Un-HTML-escape data
This commit is contained in:
parent
658cfa88d0
commit
5f6ef0e78d
1 changed files with 6 additions and 2 deletions
|
@ -17,6 +17,7 @@
|
|||
|
||||
import requests
|
||||
import re
|
||||
from html import unescape
|
||||
|
||||
|
||||
MY_COUNTRY_CODE = "BE"
|
||||
|
@ -24,12 +25,15 @@ MY_COUNTRY_NAME = "Belgium"
|
|||
|
||||
|
||||
def search_all_meta(prop, value, page_content):
|
||||
return re.findall(r'<meta property="{}" content="({})" />'.format(prop, value), page_content)
|
||||
return map(
|
||||
unescape,
|
||||
re.findall(r'<meta property="{}" content="({})" />'.format(prop, value), page_content)
|
||||
)
|
||||
|
||||
|
||||
def search_meta(prop, value, page_content):
|
||||
match = re.search(r'<meta property="{}" content="({})" />'.format(prop, value), page_content)
|
||||
return match.group(1) if match else None
|
||||
return unescape(match.group(1)) if match else None
|
||||
|
||||
|
||||
def str_exact_len(s, length, align="<"):
|
||||
|
|
Loading…
Reference in a new issue