Work with JSON dump instead of CSV

2022-12-18 22:14:44 +01:00 · 2022-12-18 22:14:44 +01:00 · 8fde521876
commit 8fde521876
parent 9a8e738166
1 changed files with 13 additions and 5 deletions
--- a/osm_wikidata_check.py
+++ b/osm_wikidata_check.py
@ -1,16 +1,20 @@
 #!/usr/bin/env python3
 import sys
-import re
+import json
 import requests
 from ipo import ipo, opi, p, read, flatten
-def ids():
+def ids(elements):
 	return sorted(set(
-		read(sys.stdin) |
+		ipo(elements) |
 		p(map)(lambda x: x["tags"].items() if "tags" in x else []) |
 		flatten |
 		p(filter)(lambda kv: kv[0] == "wikidata" or kv[0].endswith(":wikidata")) |
 		p(map)(lambda kv: kv[1]) |
 		p(map)(
-			p(re.split)(r"[\t; ]")
+			p(str.split)(sep=";")
 		) |
 		flatten |
 		p(map)(str.strip) |
@ -27,12 +31,16 @@ def first(iterable, default=None):
 s = requests.Session()
 data = json.load(sys.stdin)
 assert data.get("version") == 0.6, "Expecting OpenStreetMap data on standard input"
 assert "elements" in data
 notfound = []
 redirects = []
 try:
 	print("    Wikidata ID |  Redirect/issue | Label")
-	for wd_id in ids():
+	for wd_id in ids(data["elements"]):
 		r = s.get(f"https://www.wikidata.org/entity/{wd_id}")
 		if not r.ok:
 			print(f"{wd_id:>15} |      NOT FOUND! |\t")