Work with JSON dump instead of CSV

This commit is contained in:
Midgard 2022-12-18 22:14:44 +01:00
parent 9a8e738166
commit 8fde521876
Signed by: midgard
GPG key ID: 511C112F1331BBB4

View file

@ -1,16 +1,20 @@
#!/usr/bin/env python3
import sys
import re
import json
import requests
from ipo import ipo, opi, p, read, flatten
def ids():
def ids(elements):
return sorted(set(
read(sys.stdin) |
ipo(elements) |
p(map)(lambda x: x["tags"].items() if "tags" in x else []) |
flatten |
p(filter)(lambda kv: kv[0] == "wikidata" or kv[0].endswith(":wikidata")) |
p(map)(lambda kv: kv[1]) |
p(map)(
p(re.split)(r"[\t; ]")
p(str.split)(sep=";")
) |
flatten |
p(map)(str.strip) |
@ -27,12 +31,16 @@ def first(iterable, default=None):
s = requests.Session()
data = json.load(sys.stdin)
assert data.get("version") == 0.6, "Expecting OpenStreetMap data on standard input"
assert "elements" in data
notfound = []
redirects = []
try:
print(" Wikidata ID | Redirect/issue | Label")
for wd_id in ids():
for wd_id in ids(data["elements"]):
r = s.get(f"https://www.wikidata.org/entity/{wd_id}")
if not r.ok:
print(f"{wd_id:>15} | NOT FOUND! |\t")