osm_wikidata_check/osm_wikidata_check.py
2022-12-18 22:15:36 +01:00

74 lines
1.6 KiB
Python
Executable file

#!/usr/bin/env python3
import sys
import json
import requests
from ipo import ipo, opi, p, read, flatten
def ids(elements):
return sorted(set(
ipo(elements) |
p(map)(lambda x: x["tags"].items() if "tags" in x else []) |
flatten |
p(filter)(lambda kv: kv[0] == "wikidata" or kv[0].endswith(":wikidata")) |
p(map)(lambda kv: kv[1]) |
p(map)(
p(str.split)(sep=";")
) |
flatten |
p(map)(str.strip) |
# Select non-empty lines
p(filter)(bool) |
opi
))
def first(iterable, default=None):
return next(iter(iterable), default)
s = requests.Session()
data = json.load(sys.stdin)
assert data.get("version") == 0.6, "Expecting OpenStreetMap data on standard input"
assert "elements" in data
notfound = []
redirects = []
headers = {
"Accept-Encoding": "gzip,deflate",
"Accept": "application/json",
"User-Agent": "osm_wikidata_check"
}
try:
print(" Wikidata ID | Redirect/issue | Label")
for wd_id in ids(data["elements"]):
r = s.get(f"https://www.wikidata.org/entity/{wd_id}", headers=headers)
if not r.ok:
print(f"{wd_id:>15} | NOT FOUND! |\t")
notfound.append(wd_id)
else:
redirect = ""
datapage = r.json()
try:
data = datapage["entities"][wd_id]
except KeyError:
new_id, data = first(datapage["entities"].items())
redirect = f"{new_id}"
redirects.append(wd_id)
labels = data.get("labels") or data["lemmas"]
label = (labels.get("nl") or labels["en"])["value"]
if len(label) > 46:
label = f"{label:>45}"
print(f"{wd_id:>15} | {redirect:>15} | {label}")
finally:
print()
print()
print(f"Not found: {notfound}")
print(f"Redirects: {redirects}")
print()