#!/usr/bin/env python3 import sys import json import requests from ipo import ipo, opi, p, read, flatten def ids(elements): return sorted(set( ipo(elements) | p(map)(lambda x: x["tags"].items() if "tags" in x else []) | flatten | p(filter)(lambda kv: kv[0] == "wikidata" or kv[0].endswith(":wikidata")) | p(map)(lambda kv: kv[1]) | p(map)( p(str.split)(sep=";") ) | flatten | p(map)(str.strip) | # Select non-empty lines p(filter)(bool) | opi )) def first(iterable, default=None): return next(iter(iterable), default) def overpass_query(wd_ids): return f""" [out:json][timeout:25]; ( {overpass_selectors(wd_ids)} ); out body; >; out skel qt;""" def overpass_selector(wd_id): return ' nwr["wikidata"="' + wd_id + '"]({{bbox}});\n nwr[~":wikidata$"~"' + wd_id + '"]({{bbox}});' def overpass_selectors(wd_ids): return "\n".join(overpass_selector(wd_id) for wd_id in wd_ids) s = requests.Session() data = json.load(sys.stdin) assert data.get("version") == 0.6, "Expecting OpenStreetMap data on standard input" assert "elements" in data notfound = [] redirects = [] headers = { "Accept-Encoding": "gzip,deflate", "Accept": "application/json", "User-Agent": "osm_wikidata_check" } try: print(" Wikidata ID | Redirect/issue | Label") for wd_id in ids(data["elements"]): r = s.get(f"https://www.wikidata.org/entity/{wd_id}", headers=headers) if not r.ok: print(f"{wd_id:>15} | NOT FOUND! |\t") notfound.append(wd_id) else: redirect = "" datapage = r.json() try: data = datapage["entities"][wd_id] except KeyError: new_id, data = first(datapage["entities"].items()) redirect = f"{new_id}" redirects.append(wd_id) labels = data.get("labels") or data["lemmas"] label = (labels.get("nl") or labels["en"])["value"] if len(label) > 46: label = f"{label:>.45}…" print(f"{wd_id:>15} | {redirect:>15} | {label}") finally: print() if notfound: print(f"Not found:") print(overpass_query(notfound)) print() if redirects: print(f"Redirects: {redirects}") print(overpass_query(redirects)) print()