groenten-en-fruit/html_to_json.py

56 lines
1.1 KiB
Python
Raw Normal View History

2020-10-16 15:41:22 +02:00
#!/usr/bin/env python3
from bs4 import BeautifulSoup
import json
LEVELS = ["", "light", "normal", "dark"]
def level_from_classes(classes):
for class_ in classes:
try:
return LEVELS.index(class_)
except ValueError:
pass
# Site likes to mess with us: class="active normal" is the same as class="active"
if "active" in classes:
return LEVELS.index("normal")
return 0
def from_tr(tr):
name = tr.find(class_="productcalendar__product").a.text
month_tds = tr.find_all("td")[2:]
months = [level_from_classes(td.get("class", "")) for td in month_tds]
assert len(months) == 12
return name, months
def calendar_from_soup(soup):
return {
name: months
for name, months
in map(from_tr, soup.find(class_="productcalendar")("tr"))
}
def html_to_json(fname_in, fname_out):
with open(fname_in) as f_in:
soup = BeautifulSoup(f_in, "html.parser")
with open(fname_out, "w") as f_out:
json.dump(calendar_from_soup(soup), f_out)
def main():
html_to_json("./seizoenskalender-groenten.html", "./groenten.json")
html_to_json("./seizoenskalender-fruit.html", "./fruit.json")
if __name__ == '__main__':
main()