Initial commit
This commit is contained in:
commit
9a8e738166
2 changed files with 62 additions and 0 deletions
60
osm_wikidata_check.py
Executable file
60
osm_wikidata_check.py
Executable file
|
@ -0,0 +1,60 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import re
|
||||
import requests
|
||||
from ipo import ipo, opi, p, read, flatten
|
||||
|
||||
|
||||
def ids():
|
||||
return sorted(set(
|
||||
read(sys.stdin) |
|
||||
p(map)(
|
||||
p(re.split)(r"[\t; ]")
|
||||
) |
|
||||
flatten |
|
||||
p(map)(str.strip) |
|
||||
|
||||
# Select non-empty lines
|
||||
p(filter)(bool) |
|
||||
|
||||
opi
|
||||
))
|
||||
|
||||
|
||||
def first(iterable, default=None):
|
||||
return next(iter(iterable), default)
|
||||
|
||||
s = requests.Session()
|
||||
|
||||
notfound = []
|
||||
redirects = []
|
||||
|
||||
try:
|
||||
print(" Wikidata ID | Redirect/issue | Label")
|
||||
for wd_id in ids():
|
||||
r = s.get(f"https://www.wikidata.org/entity/{wd_id}")
|
||||
if not r.ok:
|
||||
print(f"{wd_id:>15} | NOT FOUND! |\t")
|
||||
notfound.append(wd_id)
|
||||
else:
|
||||
redirect = ""
|
||||
datapage = r.json()
|
||||
try:
|
||||
data = datapage["entities"][wd_id]
|
||||
except KeyError:
|
||||
new_id, data = first(datapage["entities"].items())
|
||||
redirect = f"{new_id}"
|
||||
redirects.append(wd_id)
|
||||
labels = data.get("labels") or data["lemmas"]
|
||||
label = (labels.get("nl") or labels["en"])["value"]
|
||||
if len(label) > 46:
|
||||
label = f"{label:>45}…"
|
||||
print(f"{wd_id:>15} | {redirect:>15} | {label}")
|
||||
|
||||
finally:
|
||||
print()
|
||||
print()
|
||||
print(f"Not found: {notfound}")
|
||||
print(f"Redirects: {redirects}")
|
||||
print()
|
2
requirements.txt
Normal file
2
requirements.txt
Normal file
|
@ -0,0 +1,2 @@
|
|||
requests
|
||||
ipo
|
Loading…
Reference in a new issue