Un-HTML-escape data
This commit is contained in:
parent
658cfa88d0
commit
5f6ef0e78d
1 changed files with 6 additions and 2 deletions
|
@ -17,6 +17,7 @@
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
import re
|
import re
|
||||||
|
from html import unescape
|
||||||
|
|
||||||
|
|
||||||
MY_COUNTRY_CODE = "BE"
|
MY_COUNTRY_CODE = "BE"
|
||||||
|
@ -24,12 +25,15 @@ MY_COUNTRY_NAME = "Belgium"
|
||||||
|
|
||||||
|
|
||||||
def search_all_meta(prop, value, page_content):
|
def search_all_meta(prop, value, page_content):
|
||||||
return re.findall(r'<meta property="{}" content="({})" />'.format(prop, value), page_content)
|
return map(
|
||||||
|
unescape,
|
||||||
|
re.findall(r'<meta property="{}" content="({})" />'.format(prop, value), page_content)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def search_meta(prop, value, page_content):
|
def search_meta(prop, value, page_content):
|
||||||
match = re.search(r'<meta property="{}" content="({})" />'.format(prop, value), page_content)
|
match = re.search(r'<meta property="{}" content="({})" />'.format(prop, value), page_content)
|
||||||
return match.group(1) if match else None
|
return unescape(match.group(1)) if match else None
|
||||||
|
|
||||||
|
|
||||||
def str_exact_len(s, length, align="<"):
|
def str_exact_len(s, length, align="<"):
|
||||||
|
|
Loading…
Reference in a new issue