Un-HTML-escape data

This commit is contained in:
Midgard 2019-11-18 14:41:18 +01:00
parent 658cfa88d0
commit 5f6ef0e78d
Signed by untrusted user who does not match committer: midgard
GPG key ID: 511C112F1331BBB4

View file

@ -17,6 +17,7 @@
import requests import requests
import re import re
from html import unescape
MY_COUNTRY_CODE = "BE" MY_COUNTRY_CODE = "BE"
@ -24,12 +25,15 @@ MY_COUNTRY_NAME = "Belgium"
def search_all_meta(prop, value, page_content): def search_all_meta(prop, value, page_content):
return re.findall(r'<meta property="{}" content="({})" />'.format(prop, value), page_content) return map(
unescape,
re.findall(r'<meta property="{}" content="({})" />'.format(prop, value), page_content)
)
def search_meta(prop, value, page_content): def search_meta(prop, value, page_content):
match = re.search(r'<meta property="{}" content="({})" />'.format(prop, value), page_content) match = re.search(r'<meta property="{}" content="({})" />'.format(prop, value), page_content)
return match.group(1) if match else None return unescape(match.group(1)) if match else None
def str_exact_len(s, length, align="<"): def str_exact_len(s, length, align="<"):