#!/usr/bin/env python3 from bs4 import BeautifulSoup import json LEVELS = ["", "light", "normal", "dark"] def level_from_classes(classes): for class_ in classes: try: return LEVELS.index(class_) except ValueError: pass # Site likes to mess with us: class="active normal" is the same as class="active" if "active" in classes: return LEVELS.index("normal") return 0 def from_tr(tr): name = tr.find(class_="productcalendar__product").a.text month_tds = tr.find_all("td")[2:] months = [level_from_classes(td.get("class", "")) for td in month_tds] assert len(months) == 12 return name, months def calendar_from_soup(soup): return { name: months for name, months in map(from_tr, soup.find(class_="productcalendar")("tr")) } def html_to_json(fname_in, fname_out): with open(fname_in) as f_in: soup = BeautifulSoup(f_in, "html.parser") with open(fname_out, "w") as f_out: json.dump(calendar_from_soup(soup), f_out) def main(): html_to_json("./seizoenskalender-groenten.html", "./groenten.json") html_to_json("./seizoenskalender-fruit.html", "./fruit.json") if __name__ == '__main__': main()