From 9a8e73816673a87224cc3c2048883ac1bf7cd3bd Mon Sep 17 00:00:00 2001 From: Midgard Date: Sun, 18 Dec 2022 21:18:01 +0100 Subject: [PATCH] Initial commit --- osm_wikidata_check.py | 60 +++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 2 ++ 2 files changed, 62 insertions(+) create mode 100755 osm_wikidata_check.py create mode 100644 requirements.txt diff --git a/osm_wikidata_check.py b/osm_wikidata_check.py new file mode 100755 index 0000000..08366a8 --- /dev/null +++ b/osm_wikidata_check.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 + +import sys +import re +import requests +from ipo import ipo, opi, p, read, flatten + + +def ids(): + return sorted(set( + read(sys.stdin) | + p(map)( + p(re.split)(r"[\t; ]") + ) | + flatten | + p(map)(str.strip) | + + # Select non-empty lines + p(filter)(bool) | + + opi + )) + + +def first(iterable, default=None): + return next(iter(iterable), default) + +s = requests.Session() + +notfound = [] +redirects = [] + +try: + print(" Wikidata ID | Redirect/issue | Label") + for wd_id in ids(): + r = s.get(f"https://www.wikidata.org/entity/{wd_id}") + if not r.ok: + print(f"{wd_id:>15} | NOT FOUND! |\t") + notfound.append(wd_id) + else: + redirect = "" + datapage = r.json() + try: + data = datapage["entities"][wd_id] + except KeyError: + new_id, data = first(datapage["entities"].items()) + redirect = f"{new_id}" + redirects.append(wd_id) + labels = data.get("labels") or data["lemmas"] + label = (labels.get("nl") or labels["en"])["value"] + if len(label) > 46: + label = f"{label:>45}…" + print(f"{wd_id:>15} | {redirect:>15} | {label}") + +finally: + print() + print() + print(f"Not found: {notfound}") + print(f"Redirects: {redirects}") + print() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d354bb2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +requests +ipo