Initial commit

This commit is contained in:
Midgard 2022-12-18 21:18:01 +01:00
commit 9a8e738166
Signed by: midgard
GPG key ID: 511C112F1331BBB4
2 changed files with 62 additions and 0 deletions

60
osm_wikidata_check.py Executable file
View file

@ -0,0 +1,60 @@
#!/usr/bin/env python3
import sys
import re
import requests
from ipo import ipo, opi, p, read, flatten
def ids():
return sorted(set(
read(sys.stdin) |
p(map)(
p(re.split)(r"[\t; ]")
) |
flatten |
p(map)(str.strip) |
# Select non-empty lines
p(filter)(bool) |
opi
))
def first(iterable, default=None):
return next(iter(iterable), default)
s = requests.Session()
notfound = []
redirects = []
try:
print(" Wikidata ID | Redirect/issue | Label")
for wd_id in ids():
r = s.get(f"https://www.wikidata.org/entity/{wd_id}")
if not r.ok:
print(f"{wd_id:>15} | NOT FOUND! |\t")
notfound.append(wd_id)
else:
redirect = ""
datapage = r.json()
try:
data = datapage["entities"][wd_id]
except KeyError:
new_id, data = first(datapage["entities"].items())
redirect = f"{new_id}"
redirects.append(wd_id)
labels = data.get("labels") or data["lemmas"]
label = (labels.get("nl") or labels["en"])["value"]
if len(label) > 46:
label = f"{label:>45}"
print(f"{wd_id:>15} | {redirect:>15} | {label}")
finally:
print()
print()
print(f"Not found: {notfound}")
print(f"Redirects: {redirects}")
print()

2
requirements.txt Normal file
View file

@ -0,0 +1,2 @@
requests
ipo