2022-12-18 21:18:01 +01:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
import sys
|
2022-12-18 22:14:44 +01:00
|
|
|
import json
|
2022-12-18 21:18:01 +01:00
|
|
|
import requests
|
|
|
|
from ipo import ipo, opi, p, read, flatten
|
|
|
|
|
|
|
|
|
2022-12-18 22:14:44 +01:00
|
|
|
def ids(elements):
|
2022-12-18 21:18:01 +01:00
|
|
|
return sorted(set(
|
2022-12-18 22:14:44 +01:00
|
|
|
ipo(elements) |
|
|
|
|
p(map)(lambda x: x["tags"].items() if "tags" in x else []) |
|
|
|
|
flatten |
|
|
|
|
p(filter)(lambda kv: kv[0] == "wikidata" or kv[0].endswith(":wikidata")) |
|
|
|
|
p(map)(lambda kv: kv[1]) |
|
2022-12-18 21:18:01 +01:00
|
|
|
p(map)(
|
2022-12-18 22:14:44 +01:00
|
|
|
p(str.split)(sep=";")
|
2022-12-18 21:18:01 +01:00
|
|
|
) |
|
|
|
|
flatten |
|
|
|
|
p(map)(str.strip) |
|
|
|
|
|
|
|
|
# Select non-empty lines
|
|
|
|
p(filter)(bool) |
|
|
|
|
|
|
|
|
opi
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
|
|
def first(iterable, default=None):
|
|
|
|
return next(iter(iterable), default)
|
|
|
|
|
2022-12-18 22:39:35 +01:00
|
|
|
|
|
|
|
def overpass_query(wd_ids):
|
|
|
|
return f""" [out:json][timeout:25]; (
|
|
|
|
{overpass_selectors(wd_ids)}
|
|
|
|
); out body; >; out skel qt;"""
|
|
|
|
|
|
|
|
|
|
|
|
def overpass_selector(wd_id):
|
2022-12-18 23:25:26 +01:00
|
|
|
return ' nwr["wikidata"="' + wd_id + '"]({{bbox}});\n nwr[~":wikidata$"~"' + wd_id + '"]({{bbox}});'
|
2022-12-18 22:39:35 +01:00
|
|
|
|
|
|
|
|
|
|
|
def overpass_selectors(wd_ids):
|
|
|
|
return "\n".join(overpass_selector(wd_id) for wd_id in wd_ids)
|
|
|
|
|
|
|
|
|
2022-12-18 21:18:01 +01:00
|
|
|
s = requests.Session()
|
|
|
|
|
2022-12-18 22:14:44 +01:00
|
|
|
data = json.load(sys.stdin)
|
|
|
|
assert data.get("version") == 0.6, "Expecting OpenStreetMap data on standard input"
|
|
|
|
assert "elements" in data
|
|
|
|
|
2022-12-18 21:18:01 +01:00
|
|
|
notfound = []
|
|
|
|
redirects = []
|
|
|
|
|
2022-12-18 22:15:36 +01:00
|
|
|
headers = {
|
|
|
|
"Accept-Encoding": "gzip,deflate",
|
|
|
|
"Accept": "application/json",
|
|
|
|
"User-Agent": "osm_wikidata_check"
|
|
|
|
}
|
|
|
|
|
2022-12-18 21:18:01 +01:00
|
|
|
try:
|
|
|
|
print(" Wikidata ID | Redirect/issue | Label")
|
2022-12-18 22:14:44 +01:00
|
|
|
for wd_id in ids(data["elements"]):
|
2022-12-18 22:15:36 +01:00
|
|
|
r = s.get(f"https://www.wikidata.org/entity/{wd_id}", headers=headers)
|
2022-12-18 21:18:01 +01:00
|
|
|
if not r.ok:
|
|
|
|
print(f"{wd_id:>15} | NOT FOUND! |\t")
|
|
|
|
notfound.append(wd_id)
|
|
|
|
else:
|
|
|
|
redirect = ""
|
|
|
|
datapage = r.json()
|
|
|
|
try:
|
|
|
|
data = datapage["entities"][wd_id]
|
|
|
|
except KeyError:
|
|
|
|
new_id, data = first(datapage["entities"].items())
|
|
|
|
redirect = f"{new_id}"
|
|
|
|
redirects.append(wd_id)
|
|
|
|
labels = data.get("labels") or data["lemmas"]
|
|
|
|
label = (labels.get("nl") or labels["en"])["value"]
|
|
|
|
if len(label) > 46:
|
2022-12-18 23:03:27 +01:00
|
|
|
label = f"{label:>.45}…"
|
2022-12-18 21:18:01 +01:00
|
|
|
print(f"{wd_id:>15} | {redirect:>15} | {label}")
|
|
|
|
|
|
|
|
finally:
|
|
|
|
print()
|
2022-12-18 22:39:35 +01:00
|
|
|
if notfound:
|
|
|
|
print(f"Not found:")
|
|
|
|
print(overpass_query(notfound))
|
|
|
|
print()
|
|
|
|
if redirects:
|
|
|
|
print(f"Redirects: {redirects}")
|
|
|
|
print(overpass_query(redirects))
|
|
|
|
print()
|