Initial commit
This commit is contained in:
commit
9a8e738166
2 changed files with 62 additions and 0 deletions
60
osm_wikidata_check.py
Executable file
60
osm_wikidata_check.py
Executable file
|
@ -0,0 +1,60 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
import requests
|
||||||
|
from ipo import ipo, opi, p, read, flatten
|
||||||
|
|
||||||
|
|
||||||
|
def ids():
|
||||||
|
return sorted(set(
|
||||||
|
read(sys.stdin) |
|
||||||
|
p(map)(
|
||||||
|
p(re.split)(r"[\t; ]")
|
||||||
|
) |
|
||||||
|
flatten |
|
||||||
|
p(map)(str.strip) |
|
||||||
|
|
||||||
|
# Select non-empty lines
|
||||||
|
p(filter)(bool) |
|
||||||
|
|
||||||
|
opi
|
||||||
|
))
|
||||||
|
|
||||||
|
|
||||||
|
def first(iterable, default=None):
|
||||||
|
return next(iter(iterable), default)
|
||||||
|
|
||||||
|
s = requests.Session()
|
||||||
|
|
||||||
|
notfound = []
|
||||||
|
redirects = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
print(" Wikidata ID | Redirect/issue | Label")
|
||||||
|
for wd_id in ids():
|
||||||
|
r = s.get(f"https://www.wikidata.org/entity/{wd_id}")
|
||||||
|
if not r.ok:
|
||||||
|
print(f"{wd_id:>15} | NOT FOUND! |\t")
|
||||||
|
notfound.append(wd_id)
|
||||||
|
else:
|
||||||
|
redirect = ""
|
||||||
|
datapage = r.json()
|
||||||
|
try:
|
||||||
|
data = datapage["entities"][wd_id]
|
||||||
|
except KeyError:
|
||||||
|
new_id, data = first(datapage["entities"].items())
|
||||||
|
redirect = f"{new_id}"
|
||||||
|
redirects.append(wd_id)
|
||||||
|
labels = data.get("labels") or data["lemmas"]
|
||||||
|
label = (labels.get("nl") or labels["en"])["value"]
|
||||||
|
if len(label) > 46:
|
||||||
|
label = f"{label:>45}…"
|
||||||
|
print(f"{wd_id:>15} | {redirect:>15} | {label}")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
print()
|
||||||
|
print()
|
||||||
|
print(f"Not found: {notfound}")
|
||||||
|
print(f"Redirects: {redirects}")
|
||||||
|
print()
|
2
requirements.txt
Normal file
2
requirements.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
requests
|
||||||
|
ipo
|
Loading…
Reference in a new issue