Work with JSON dump instead of CSV

This commit is contained in:
Midgard 2022-12-18 22:14:44 +01:00
parent 9a8e738166
commit 8fde521876
Signed by: midgard
GPG key ID: 511C112F1331BBB4

View file

@ -1,16 +1,20 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import sys import sys
import re import json
import requests import requests
from ipo import ipo, opi, p, read, flatten from ipo import ipo, opi, p, read, flatten
def ids(): def ids(elements):
return sorted(set( return sorted(set(
read(sys.stdin) | ipo(elements) |
p(map)(lambda x: x["tags"].items() if "tags" in x else []) |
flatten |
p(filter)(lambda kv: kv[0] == "wikidata" or kv[0].endswith(":wikidata")) |
p(map)(lambda kv: kv[1]) |
p(map)( p(map)(
p(re.split)(r"[\t; ]") p(str.split)(sep=";")
) | ) |
flatten | flatten |
p(map)(str.strip) | p(map)(str.strip) |
@ -27,12 +31,16 @@ def first(iterable, default=None):
s = requests.Session() s = requests.Session()
data = json.load(sys.stdin)
assert data.get("version") == 0.6, "Expecting OpenStreetMap data on standard input"
assert "elements" in data
notfound = [] notfound = []
redirects = [] redirects = []
try: try:
print(" Wikidata ID | Redirect/issue | Label") print(" Wikidata ID | Redirect/issue | Label")
for wd_id in ids(): for wd_id in ids(data["elements"]):
r = s.get(f"https://www.wikidata.org/entity/{wd_id}") r = s.get(f"https://www.wikidata.org/entity/{wd_id}")
if not r.ok: if not r.ok:
print(f"{wd_id:>15} | NOT FOUND! |\t") print(f"{wd_id:>15} | NOT FOUND! |\t")