diff --git a/osm_wikidata_check.py b/osm_wikidata_check.py index 08366a8..4b0a5ed 100755 --- a/osm_wikidata_check.py +++ b/osm_wikidata_check.py @@ -1,16 +1,20 @@ #!/usr/bin/env python3 import sys -import re +import json import requests from ipo import ipo, opi, p, read, flatten -def ids(): +def ids(elements): return sorted(set( - read(sys.stdin) | + ipo(elements) | + p(map)(lambda x: x["tags"].items() if "tags" in x else []) | + flatten | + p(filter)(lambda kv: kv[0] == "wikidata" or kv[0].endswith(":wikidata")) | + p(map)(lambda kv: kv[1]) | p(map)( - p(re.split)(r"[\t; ]") + p(str.split)(sep=";") ) | flatten | p(map)(str.strip) | @@ -27,12 +31,16 @@ def first(iterable, default=None): s = requests.Session() +data = json.load(sys.stdin) +assert data.get("version") == 0.6, "Expecting OpenStreetMap data on standard input" +assert "elements" in data + notfound = [] redirects = [] try: print(" Wikidata ID | Redirect/issue | Label") - for wd_id in ids(): + for wd_id in ids(data["elements"]): r = s.get(f"https://www.wikidata.org/entity/{wd_id}") if not r.ok: print(f"{wd_id:>15} | NOT FOUND! |\t")