56 lines
1.1 KiB
Python
56 lines
1.1 KiB
Python
|
#!/usr/bin/env python3
|
||
|
|
||
|
from bs4 import BeautifulSoup
|
||
|
import json
|
||
|
|
||
|
|
||
|
LEVELS = ["", "light", "normal", "dark"]
|
||
|
|
||
|
|
||
|
def level_from_classes(classes):
|
||
|
for class_ in classes:
|
||
|
try:
|
||
|
return LEVELS.index(class_)
|
||
|
except ValueError:
|
||
|
pass
|
||
|
|
||
|
# Site likes to mess with us: class="active normal" is the same as class="active"
|
||
|
if "active" in classes:
|
||
|
return LEVELS.index("normal")
|
||
|
|
||
|
return 0
|
||
|
|
||
|
|
||
|
def from_tr(tr):
|
||
|
name = tr.find(class_="productcalendar__product").a.text
|
||
|
|
||
|
month_tds = tr.find_all("td")[2:]
|
||
|
months = [level_from_classes(td.get("class", "")) for td in month_tds]
|
||
|
|
||
|
assert len(months) == 12
|
||
|
|
||
|
return name, months
|
||
|
|
||
|
|
||
|
def calendar_from_soup(soup):
|
||
|
return {
|
||
|
name: months
|
||
|
for name, months
|
||
|
in map(from_tr, soup.find(class_="productcalendar")("tr"))
|
||
|
}
|
||
|
|
||
|
|
||
|
def html_to_json(fname_in, fname_out):
|
||
|
with open(fname_in) as f_in:
|
||
|
soup = BeautifulSoup(f_in, "html.parser")
|
||
|
with open(fname_out, "w") as f_out:
|
||
|
json.dump(calendar_from_soup(soup), f_out)
|
||
|
|
||
|
|
||
|
def main():
|
||
|
html_to_json("./seizoenskalender-groenten.html", "./groenten.json")
|
||
|
html_to_json("./seizoenskalender-fruit.html", "./fruit.json")
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
main()
|