Work with JSON dump instead of CSV
This commit is contained in:
parent
9a8e738166
commit
8fde521876
1 changed files with 13 additions and 5 deletions
|
@ -1,16 +1,20 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import re
|
import json
|
||||||
import requests
|
import requests
|
||||||
from ipo import ipo, opi, p, read, flatten
|
from ipo import ipo, opi, p, read, flatten
|
||||||
|
|
||||||
|
|
||||||
def ids():
|
def ids(elements):
|
||||||
return sorted(set(
|
return sorted(set(
|
||||||
read(sys.stdin) |
|
ipo(elements) |
|
||||||
|
p(map)(lambda x: x["tags"].items() if "tags" in x else []) |
|
||||||
|
flatten |
|
||||||
|
p(filter)(lambda kv: kv[0] == "wikidata" or kv[0].endswith(":wikidata")) |
|
||||||
|
p(map)(lambda kv: kv[1]) |
|
||||||
p(map)(
|
p(map)(
|
||||||
p(re.split)(r"[\t; ]")
|
p(str.split)(sep=";")
|
||||||
) |
|
) |
|
||||||
flatten |
|
flatten |
|
||||||
p(map)(str.strip) |
|
p(map)(str.strip) |
|
||||||
|
@ -27,12 +31,16 @@ def first(iterable, default=None):
|
||||||
|
|
||||||
s = requests.Session()
|
s = requests.Session()
|
||||||
|
|
||||||
|
data = json.load(sys.stdin)
|
||||||
|
assert data.get("version") == 0.6, "Expecting OpenStreetMap data on standard input"
|
||||||
|
assert "elements" in data
|
||||||
|
|
||||||
notfound = []
|
notfound = []
|
||||||
redirects = []
|
redirects = []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
print(" Wikidata ID | Redirect/issue | Label")
|
print(" Wikidata ID | Redirect/issue | Label")
|
||||||
for wd_id in ids():
|
for wd_id in ids(data["elements"]):
|
||||||
r = s.get(f"https://www.wikidata.org/entity/{wd_id}")
|
r = s.get(f"https://www.wikidata.org/entity/{wd_id}")
|
||||||
if not r.ok:
|
if not r.ok:
|
||||||
print(f"{wd_id:>15} | NOT FOUND! |\t")
|
print(f"{wd_id:>15} | NOT FOUND! |\t")
|
||||||
|
|
Loading…
Reference in a new issue