Improve metadata in language translations, remove wikidata-downloading-script

This commit is contained in:
Pieter Vander Vennet 2022-10-29 03:01:24 +02:00
parent cba1a4340e
commit 30a835a232
4 changed files with 66 additions and 197 deletions

View file

@ -132,6 +132,7 @@
"en" "en"
], ],
"CN": [ "CN": [
"zh",
"zh" "zh"
], ],
"CO": [ "CO": [
@ -148,6 +149,7 @@
], ],
"CY": [ "CY": [
"tr", "tr",
"el",
"el" "el"
], ],
"CZ": [ "CZ": [
@ -247,6 +249,9 @@
"es", "es",
"pt" "pt"
], ],
"GR": [
"el"
],
"GT": [ "GT": [
"es" "es"
], ],
@ -453,8 +458,7 @@
"fr" "fr"
], ],
"NG": [ "NG": [
"en", "en"
"yo"
], ],
"NI": [ "NI": [
"es" "es"
@ -502,9 +506,7 @@
"en" "en"
], ],
"PK": [ "PK": [
"ur", "ur"
"en",
"ar"
], ],
"PL": [ "PL": [
"pl", "pl",
@ -559,7 +561,6 @@
"ar" "ar"
], ],
"SE": [ "SE": [
"sv",
"sv" "sv"
], ],
"SG": [ "SG": [
@ -648,6 +649,9 @@
"en", "en",
"en" "en"
], ],
"TW": [
"zh"
],
"TZ": [ "TZ": [
"en", "en",
"sw" "sw"
@ -693,16 +697,16 @@
"ar" "ar"
], ],
"ZA": [ "ZA": [
"en",
"zu",
"xh",
"af", "af",
"ve", "ve",
"ss", "ss",
"tn", "tn",
"ts", "ts",
"st", "st",
"nr", "nr"
"en",
"zu",
"xh"
], ],
"ZM": [ "ZM": [
"en" "en"

View file

@ -54,7 +54,7 @@
"weblate-fix-heavy": "git remote rm weblate-layers; git remote add weblate-layers https://hosted.weblate.org/git/mapcomplete/layers/; git remote update weblate-layers; git merge weblate-layers/master", "weblate-fix-heavy": "git remote rm weblate-layers; git remote add weblate-layers https://hosted.weblate.org/git/mapcomplete/layers/; git remote update weblate-layers; git merge weblate-layers/master",
"housekeeping": "npm run generate && npm run generate:docs && npm run generate:contributor-list && npm run format && git add assets/ langs/ Docs/ **/*.ts Docs/* && git commit -m 'Housekeeping...'", "housekeeping": "npm run generate && npm run generate:docs && npm run generate:contributor-list && npm run format && git add assets/ langs/ Docs/ **/*.ts Docs/* && git commit -m 'Housekeeping...'",
"parseSchools": "ts-node scripts/schools/amendSchoolData.ts", "parseSchools": "ts-node scripts/schools/amendSchoolData.ts",
"steal": "ts-node scripts/thieves/stealLanguages.ts" "steal": "ts-node scripts/fetchLanguages.ts"
}, },
"keywords": [ "keywords": [
"OpenStreetMap", "OpenStreetMap",

View file

@ -1,5 +1,6 @@
/** /**
* Fetches all 'modern languages' from wikidata, then exports their names in every language * Fetches all 'modern languages' from wikidata, then exports their names in every language.
* Some meta-info (e.g. RTL) is exported too
*/ */
import * as wds from "wikidata-sdk" import * as wds from "wikidata-sdk"
@ -21,12 +22,15 @@ async function fetchRegularLanguages() {
console.log("Fetching languages") console.log("Fetching languages")
const sparql = const sparql =
"SELECT ?lang ?label ?code \n" + "SELECT ?lang ?label ?code ?directionalityLabel \n" +
"WHERE \n" + "WHERE \n" +
"{ \n" + "{ \n" +
" ?lang wdt:P31 wd:Q1288568. \n" + // language instanceOf (p31) modern language(Q1288568) " ?lang wdt:P31 wd:Q1288568. \n" + // language instanceOf (p31) modern language(Q1288568)
" ?lang rdfs:label ?label. \n" + " ?lang rdfs:label ?label. \n" +
" ?lang wdt:P424 ?code" + // Wikimedia language code seems to be close to the weblate entries " ?lang wdt:P282 ?writing_system. \n"+
" ?writing_system wdt:P1406 ?directionality. \n" +
" ?lang wdt:P424 ?code. \n" +// Wikimedia language code seems to be close to the weblate entries
" SERVICE wikibase:label { bd:serviceParam wikibase:language \"en\". } \n" +
"} " "} "
const url = wds.sparqlQuery(sparql) const url = wds.sparqlQuery(sparql)
@ -67,16 +71,19 @@ async function fetchSpecial(id: number, code: string) {
return bindings return bindings
} }
function getNativeList(langs: Map<string, Map<string, string>>) { function getNativeList(langs: Map<string, { translations: Map<string, string> }>) {
const native = {} const native = {}
const keys: string[] = Array.from(langs.keys()) const keys: string[] = Array.from(langs.keys())
keys.sort() keys.sort()
for (const key of keys) { for (const key of keys) {
const translations: Map<string, string> = langs.get(key) const translations: Map<string, string> = langs.get(key).translations
if (!LanguageUtils.usedLanguages.has(key)) { if (!LanguageUtils.usedLanguages.has(key)) {
continue continue
} }
native[key] = translations.get(key) native[key] = translations.get(key)
if(native[key] === undefined){
console.log("No native translation found for "+key)
}
} }
return native return native
} }
@ -108,33 +115,7 @@ async function getOfficialLanguagesPerCountry(): Promise<Map<string, string[]>>
return lngs return lngs
} }
async function main(wipeCache = false) { async function getOfficialLanguagesPerCountryCached(wipeCache: boolean): Promise<Record<string /*Country code*/, string[] /*Language codes*/>>{
const cacheFile = "./assets/generated/languages-wd.json"
if (wipeCache || !existsSync(cacheFile)) {
console.log("Refreshing cache")
await fetch(cacheFile)
} else {
console.log("Reusing the cached file")
}
const data = JSON.parse(readFileSync(cacheFile, "UTF8"))
const perId = WikidataUtils.extractLanguageData(data, WikidataUtils.languageRemapping)
const nativeList = getNativeList(perId)
writeFileSync("./assets/language_native.json", JSON.stringify(nativeList, null, " "))
const translations = Utils.MapToObj(perId, (value, key) => {
if (!LanguageUtils.usedLanguages.has(key)) {
return undefined // Remove unused languages
}
return Utils.MapToObj(value, (v, k) => {
if (!LanguageUtils.usedLanguages.has(k)) {
return undefined
}
return v
})
})
writeFileSync("./assets/language_translations.json", JSON.stringify(translations, null, " "))
let officialLanguages: Record<string, string[]> let officialLanguages: Record<string, string[]>
const officialLanguagesPath = "./assets/language_in_country.json" const officialLanguagesPath = "./assets/language_in_country.json"
if (existsSync("./assets/languages_in_country.json") && !wipeCache) { if (existsSync("./assets/languages_in_country.json") && !wipeCache) {
@ -143,37 +124,48 @@ async function main(wipeCache = false) {
officialLanguages = Utils.MapToObj(await getOfficialLanguagesPerCountry(), (t) => t) officialLanguages = Utils.MapToObj(await getOfficialLanguagesPerCountry(), (t) => t)
writeFileSync(officialLanguagesPath, JSON.stringify(officialLanguages, null, " ")) writeFileSync(officialLanguagesPath, JSON.stringify(officialLanguages, null, " "))
} }
return officialLanguages
}
const perLanguage = Utils.TransposeMap(officialLanguages) async function main(wipeCache = false) {
console.log(JSON.stringify(perLanguage, null, " ")) const cacheFile = "./assets/generated/languages-wd.json"
const mappings: { if: string; then: Record<string, string>; hideInAnswer: string }[] = [] if (wipeCache || !existsSync(cacheFile)) {
for (const language of Object.keys(perLanguage)) { console.log("Refreshing cache")
const countries = Utils.Dedup(perLanguage[language].map((c) => c.toLowerCase())) await fetch(cacheFile)
mappings.push({ } else {
if: "language=" + language, console.log("Reusing the cached file")
then: translations[language], }
hideInAnswer: "_country=" + countries.join("|"),
const data = JSON.parse(readFileSync(cacheFile, "UTF8"))
const perId = WikidataUtils.extractLanguageData(data, WikidataUtils.languageRemapping)
const nativeList = getNativeList(perId)
writeFileSync("./assets/language_native.json", JSON.stringify(nativeList, null, " "))
const languagesPerCountry = Utils.TransposeMap(await getOfficialLanguagesPerCountryCached(wipeCache))
const translations = Utils.MapToObj(perId, (value, key) => {
// We keep all language codes in the list...
const translatedForId : Record<string, string | {countries?: string[], dir: string[]}> = Utils.MapToObj(value.translations, (v, k) => {
if (!LanguageUtils.usedLanguages.has(k)) {
// ... but don't keep translations if we don't have a displayed language for them
return undefined
}
return v
}) })
}
const tagRenderings = <QuestionableTagRenderingConfigJson>{ translatedForId["_meta"] = {
id: "official-language", countries : Utils.Dedup( languagesPerCountry[key]),
mappings, dir: value.directionality
question: "What languages are spoken here?", }
}
return translatedForId
})
writeFileSync("./assets/language_translations.json", JSON.stringify(translations, null, " "))
writeFileSync(
"./assets/layers/language/language.json",
JSON.stringify(
<LayerConfigJson>{
id: "language",
description: "Various tagRenderings to help language tooling",
tagRenderings,
},
null,
" "
)
)
} }
const forceRefresh = process.argv[2] === "--force-refresh" const forceRefresh = process.argv[2] === "--force-refresh"

View file

@ -1,127 +0,0 @@
/*
* Uses the languages in and to every translation from wikidata to generate a language question in wikidata/wikidata
* */
import WikidataUtils from "../../Utils/WikidataUtils"
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs"
import { LayerConfigJson } from "../../Models/ThemeConfig/Json/LayerConfigJson"
import { MappingConfigJson } from "../../Models/ThemeConfig/Json/QuestionableTagRenderingConfigJson"
import LanguageUtils from "../../Utils/LanguageUtils"
import * as perCountry from "../../assets/language_in_country.json"
import { Utils } from "../../Utils"
function main() {
const sourcepath = "assets/generated/languages-wd.json"
console.log(`Converting language data file '${sourcepath}' into a tagMapping`)
const languages = WikidataUtils.extractLanguageData(
JSON.parse(readFileSync(sourcepath, "utf8")),
{}
)
const mappings: MappingConfigJson[] = []
const schoolmappings: MappingConfigJson[] = []
const brailemappings: MappingConfigJson[] = []
const countryToLanguage: Record<string, string[]> = perCountry
const officialLanguagesPerCountry = Utils.TransposeMap(countryToLanguage)
languages.forEach((l, code) => {
const then: Record<string, string> = {}
l.forEach((tr, lng) => {
const languageCodeWeblate = WikidataUtils.languageRemapping[lng] ?? lng
if (!LanguageUtils.usedLanguages.has(languageCodeWeblate)) {
return
}
then[languageCodeWeblate] = tr
})
const officialCountries = Utils.Dedup(
officialLanguagesPerCountry[code]?.map((s) => s.toLowerCase()) ?? []
)
const prioritySearch =
officialCountries.length > 0
? "_country~" + officialCountries.map((c) => "((^|;)" + c + "($|;))").join("|")
: undefined
mappings.push(<MappingConfigJson>{
if: "language:" + code + "=yes",
ifnot: "language:" + code + "=",
searchTerms: {
"*": [code],
},
then,
priorityIf: prioritySearch,
})
schoolmappings.push(<MappingConfigJson>{
if: "school:language=" + code,
then,
priorityIf: prioritySearch,
searchTerms: {
"*": [code],
},
})
brailemappings.push(<MappingConfigJson>{
if: "tactile_writing:braille:" + code + "=yes",
ifnot: "tactile_writing:braille:" + code + "=",
searchTerms: {
"*": [code],
},
then,
priorityIf: prioritySearch,
})
})
const wikidataLayer = <LayerConfigJson>{
id: "wikidata",
description: {
en: "Various tagrenderings which are generated from Wikidata. Automatically generated with a script, don't edit manually",
},
"#dont-translate": "*",
source: {
osmTags: "id~*",
},
title: null,
mapRendering: null,
tagRenderings: [
{
id: "language",
// @ts-ignore
description: "Enables to pick *a single* 'language:<lng>=yes' within the mappings",
mappings,
},
{
builtin: "wikidata.language",
override: {
id: "language-multi",
// @ts-ignore
description:
"Enables to pick *multiple* 'language:<lng>=yes' within the mappings",
multiAnswer: true,
},
},
{
id: "school-language",
// @ts-ignore
description: "Enables to pick a single 'school:language=<lng>' within the mappings",
multiAnswer: true,
mappings: schoolmappings,
},
{
id: "tactile_writing-braille",
// @ts-ignore
description:
"Enables to pick *multiple* 'tactile_writing:braille=<lng>' within the mappings",
multiAnswer: true,
mappings: brailemappings,
},
],
}
const dir = "./assets/layers/wikidata/"
if (!existsSync(dir)) {
mkdirSync(dir)
}
const path = dir + "wikidata.json"
writeFileSync(path, JSON.stringify(wikidataLayer, null, " "))
console.log("Written " + path)
}
main()