From 8fde521876c81c7559e5987c5fdd273906a3d742 Mon Sep 17 00:00:00 2001 From: Midgard Date: Sun, 18 Dec 2022 22:14:44 +0100 Subject: [PATCH] Work with JSON dump instead of CSV --- osm_wikidata_check.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/osm_wikidata_check.py b/osm_wikidata_check.py index 08366a8..4b0a5ed 100755 --- a/osm_wikidata_check.py +++ b/osm_wikidata_check.py @@ -1,16 +1,20 @@ #!/usr/bin/env python3 import sys -import re +import json import requests from ipo import ipo, opi, p, read, flatten -def ids(): +def ids(elements): return sorted(set( - read(sys.stdin) | + ipo(elements) | + p(map)(lambda x: x["tags"].items() if "tags" in x else []) | + flatten | + p(filter)(lambda kv: kv[0] == "wikidata" or kv[0].endswith(":wikidata")) | + p(map)(lambda kv: kv[1]) | p(map)( - p(re.split)(r"[\t; ]") + p(str.split)(sep=";") ) | flatten | p(map)(str.strip) | @@ -27,12 +31,16 @@ def first(iterable, default=None): s = requests.Session() +data = json.load(sys.stdin) +assert data.get("version") == 0.6, "Expecting OpenStreetMap data on standard input" +assert "elements" in data + notfound = [] redirects = [] try: print(" Wikidata ID | Redirect/issue | Label") - for wd_id in ids(): + for wd_id in ids(data["elements"]): r = s.get(f"https://www.wikidata.org/entity/{wd_id}") if not r.ok: print(f"{wd_id:>15} | NOT FOUND! |\t")