2022-02-24 02:33:20 +01:00
|
|
|
/**
|
2022-10-29 03:01:24 +02:00
|
|
|
* Fetches all 'modern languages' from wikidata, then exports their names in every language.
|
|
|
|
* Some meta-info (e.g. RTL) is exported too
|
2022-02-24 02:33:20 +01:00
|
|
|
*/
|
|
|
|
|
|
|
|
import * as wds from "wikidata-sdk"
|
2022-09-08 21:40:48 +02:00
|
|
|
import { Utils } from "../Utils"
|
|
|
|
import ScriptUtils from "./ScriptUtils"
|
|
|
|
import { existsSync, readFileSync, writeFileSync } from "fs"
|
|
|
|
import { QuestionableTagRenderingConfigJson } from "../Models/ThemeConfig/Json/QuestionableTagRenderingConfigJson"
|
|
|
|
import { LayerConfigJson } from "../Models/ThemeConfig/Json/LayerConfigJson"
|
|
|
|
import WikidataUtils from "../Utils/WikidataUtils"
|
|
|
|
import LanguageUtils from "../Utils/LanguageUtils"
|
|
|
|
|
|
|
|
async function fetch(target: string) {
|
2022-02-25 01:15:16 +01:00
|
|
|
const regular = await fetchRegularLanguages()
|
|
|
|
writeFileSync(target, JSON.stringify(regular, null, " "))
|
2022-09-08 21:40:48 +02:00
|
|
|
console.log("Written to " + target)
|
2022-02-25 01:15:16 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
async function fetchRegularLanguages() {
|
2022-02-24 02:33:20 +01:00
|
|
|
console.log("Fetching languages")
|
|
|
|
|
2022-09-08 21:40:48 +02:00
|
|
|
const sparql =
|
2022-10-29 03:01:24 +02:00
|
|
|
"SELECT ?lang ?label ?code ?directionalityLabel \n" +
|
2022-09-08 21:40:48 +02:00
|
|
|
"WHERE \n" +
|
|
|
|
"{ \n" +
|
|
|
|
" ?lang wdt:P31 wd:Q1288568. \n" + // language instanceOf (p31) modern language(Q1288568)
|
|
|
|
" ?lang rdfs:label ?label. \n" +
|
2022-11-02 13:47:34 +01:00
|
|
|
" ?lang wdt:P282 ?writing_system. \n" +
|
2022-10-29 03:01:24 +02:00
|
|
|
" ?writing_system wdt:P1406 ?directionality. \n" +
|
2022-11-02 13:47:34 +01:00
|
|
|
" ?lang wdt:P424 ?code. \n" + // Wikimedia language code seems to be close to the weblate entries
|
|
|
|
' SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } \n' +
|
2022-09-08 21:40:48 +02:00
|
|
|
"} "
|
2022-02-24 02:33:20 +01:00
|
|
|
const url = wds.sparqlQuery(sparql)
|
|
|
|
|
2022-09-08 21:40:48 +02:00
|
|
|
// request the generated URL with your favorite HTTP request library
|
|
|
|
const result = await Utils.downloadJson(url, { "User-Agent": "MapComplete script" })
|
2022-02-25 01:15:16 +01:00
|
|
|
const bindings = result.results.bindings
|
2022-09-08 21:40:48 +02:00
|
|
|
|
2022-02-25 01:15:16 +01:00
|
|
|
const zh_hant = await fetchSpecial(18130932, "zh_Hant")
|
|
|
|
const zh_hans = await fetchSpecial(13414913, "zh_Hant")
|
2022-09-08 21:40:48 +02:00
|
|
|
const pt_br = await fetchSpecial(750553, "pt_BR")
|
|
|
|
const fil = await fetchSpecial(33298, "fil")
|
2022-04-22 16:51:49 +02:00
|
|
|
|
2022-02-25 01:15:16 +01:00
|
|
|
bindings.push(...zh_hant)
|
|
|
|
bindings.push(...zh_hans)
|
|
|
|
bindings.push(...pt_br)
|
2022-04-22 16:51:49 +02:00
|
|
|
bindings.push(...fil)
|
2022-02-25 01:15:16 +01:00
|
|
|
|
2022-09-08 21:40:48 +02:00
|
|
|
return result.results.bindings
|
2022-02-25 01:15:16 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
async function fetchSpecial(id: number, code: string) {
|
|
|
|
ScriptUtils.fixUtils()
|
|
|
|
console.log("Fetching languages")
|
|
|
|
|
2022-09-08 21:40:48 +02:00
|
|
|
const sparql =
|
|
|
|
"SELECT ?lang ?label ?code \n" +
|
|
|
|
"WHERE \n" +
|
|
|
|
"{ \n" +
|
|
|
|
" wd:Q" +
|
|
|
|
id +
|
|
|
|
" rdfs:label ?label. \n" +
|
|
|
|
"} "
|
2022-02-25 01:15:16 +01:00
|
|
|
const url = wds.sparqlQuery(sparql)
|
|
|
|
|
2022-09-08 21:40:48 +02:00
|
|
|
const result = await Utils.downloadJson(url, { "User-Agent": "MapComplete script" })
|
2022-02-25 01:15:16 +01:00
|
|
|
const bindings = result.results.bindings
|
2022-09-08 21:40:48 +02:00
|
|
|
bindings.forEach((binding) => (binding["code"] = { value: code }))
|
2022-02-25 01:15:16 +01:00
|
|
|
return bindings
|
2022-02-24 02:33:20 +01:00
|
|
|
}
|
|
|
|
|
2022-10-29 03:01:24 +02:00
|
|
|
function getNativeList(langs: Map<string, { translations: Map<string, string> }>) {
|
2022-02-24 02:33:20 +01:00
|
|
|
const native = {}
|
2022-04-19 01:55:14 +02:00
|
|
|
const keys: string[] = Array.from(langs.keys())
|
|
|
|
keys.sort()
|
|
|
|
for (const key of keys) {
|
2022-10-29 03:01:24 +02:00
|
|
|
const translations: Map<string, string> = langs.get(key).translations
|
2022-09-08 21:40:48 +02:00
|
|
|
if (!LanguageUtils.usedLanguages.has(key)) {
|
2022-04-19 01:55:14 +02:00
|
|
|
continue
|
2022-02-25 01:50:15 +01:00
|
|
|
}
|
2022-02-24 02:33:20 +01:00
|
|
|
native[key] = translations.get(key)
|
2022-11-02 13:47:34 +01:00
|
|
|
if (native[key] === undefined) {
|
|
|
|
console.log("No native translation found for " + key)
|
2022-10-29 03:01:24 +02:00
|
|
|
}
|
2022-04-19 01:55:14 +02:00
|
|
|
}
|
2022-02-24 02:33:20 +01:00
|
|
|
return native
|
|
|
|
}
|
|
|
|
|
2022-09-08 21:40:48 +02:00
|
|
|
async function getOfficialLanguagesPerCountry(): Promise<Map<string, string[]>> {
|
|
|
|
const lngs = new Map<string, string[]>()
|
2022-06-24 16:49:03 +02:00
|
|
|
const sparql = `SELECT ?country ?countryLabel ?countryCode ?language ?languageCode ?languageLabel
|
|
|
|
WHERE
|
|
|
|
{
|
|
|
|
?country wdt:P31/wdt:P279* wd:Q3624078;
|
|
|
|
wdt:P297 ?countryCode;
|
|
|
|
wdt:P37 ?language.
|
|
|
|
?language wdt:P218 ?languageCode.
|
|
|
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
|
|
}`
|
|
|
|
const url = wds.sparqlQuery(sparql)
|
|
|
|
|
2022-09-08 21:40:48 +02:00
|
|
|
const result = await Utils.downloadJson(url, { "User-Agent": "MapComplete script" })
|
|
|
|
const bindings: { countryCode: { value: string }; languageCode: { value: string } }[] =
|
|
|
|
result.results.bindings
|
2022-06-24 16:49:03 +02:00
|
|
|
for (const binding of bindings) {
|
|
|
|
const countryCode = binding.countryCode.value
|
|
|
|
const language = binding.languageCode.value
|
2022-09-08 21:40:48 +02:00
|
|
|
if (lngs.get(countryCode) === undefined) {
|
2022-06-24 16:49:03 +02:00
|
|
|
lngs.set(countryCode, [])
|
|
|
|
}
|
|
|
|
lngs.get(countryCode).push(language)
|
|
|
|
}
|
2022-09-08 21:40:48 +02:00
|
|
|
return lngs
|
2022-06-24 16:49:03 +02:00
|
|
|
}
|
|
|
|
|
2022-11-02 13:47:34 +01:00
|
|
|
async function getOfficialLanguagesPerCountryCached(
|
|
|
|
wipeCache: boolean
|
|
|
|
): Promise<Record<string /*Country code*/, string[] /*Language codes*/>> {
|
2022-10-29 03:01:24 +02:00
|
|
|
let officialLanguages: Record<string, string[]>
|
|
|
|
const officialLanguagesPath = "./assets/language_in_country.json"
|
|
|
|
if (existsSync("./assets/languages_in_country.json") && !wipeCache) {
|
|
|
|
officialLanguages = JSON.parse(readFileSync(officialLanguagesPath, "utf8"))
|
|
|
|
} else {
|
|
|
|
officialLanguages = Utils.MapToObj(await getOfficialLanguagesPerCountry(), (t) => t)
|
|
|
|
writeFileSync(officialLanguagesPath, JSON.stringify(officialLanguages, null, " "))
|
|
|
|
}
|
|
|
|
return officialLanguages
|
|
|
|
}
|
|
|
|
|
2022-09-08 21:40:48 +02:00
|
|
|
async function main(wipeCache = false) {
|
2022-02-24 02:33:20 +01:00
|
|
|
const cacheFile = "./assets/generated/languages-wd.json"
|
2022-09-08 21:40:48 +02:00
|
|
|
if (wipeCache || !existsSync(cacheFile)) {
|
2022-02-25 01:15:16 +01:00
|
|
|
console.log("Refreshing cache")
|
2022-09-08 21:40:48 +02:00
|
|
|
await fetch(cacheFile)
|
|
|
|
} else {
|
2022-02-24 02:33:20 +01:00
|
|
|
console.log("Reusing the cached file")
|
|
|
|
}
|
2022-10-29 03:01:24 +02:00
|
|
|
|
2023-01-15 23:28:02 +01:00
|
|
|
const data = JSON.parse(readFileSync(cacheFile, { encoding: "utf8" }))
|
2022-07-11 09:14:26 +02:00
|
|
|
const perId = WikidataUtils.extractLanguageData(data, WikidataUtils.languageRemapping)
|
2022-02-24 02:33:20 +01:00
|
|
|
const nativeList = getNativeList(perId)
|
2022-02-25 01:15:16 +01:00
|
|
|
writeFileSync("./assets/language_native.json", JSON.stringify(nativeList, null, " "))
|
2022-11-02 13:47:34 +01:00
|
|
|
const languagesPerCountry = Utils.TransposeMap(
|
|
|
|
await getOfficialLanguagesPerCountryCached(wipeCache)
|
|
|
|
)
|
2022-06-24 16:49:03 +02:00
|
|
|
const translations = Utils.MapToObj(perId, (value, key) => {
|
2022-10-29 03:01:24 +02:00
|
|
|
// We keep all language codes in the list...
|
2022-11-02 13:47:34 +01:00
|
|
|
const translatedForId: Record<string, string | { countries?: string[]; dir: string[] }> =
|
|
|
|
Utils.MapToObj(value.translations, (v, k) => {
|
|
|
|
if (!LanguageUtils.usedLanguages.has(k)) {
|
|
|
|
// ... but don't keep translations if we don't have a displayed language for them
|
|
|
|
return undefined
|
|
|
|
}
|
|
|
|
return v
|
|
|
|
})
|
2022-10-29 03:01:24 +02:00
|
|
|
|
|
|
|
translatedForId["_meta"] = {
|
2022-11-02 13:47:34 +01:00
|
|
|
countries: Utils.Dedup(languagesPerCountry[key]),
|
|
|
|
dir: value.directionality,
|
2022-10-29 03:01:24 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return translatedForId
|
2022-02-25 01:50:15 +01:00
|
|
|
})
|
2022-09-08 21:40:48 +02:00
|
|
|
|
|
|
|
writeFileSync("./assets/language_translations.json", JSON.stringify(translations, null, " "))
|
2022-02-24 02:33:20 +01:00
|
|
|
}
|
|
|
|
|
2022-02-25 01:15:16 +01:00
|
|
|
const forceRefresh = process.argv[2] === "--force-refresh"
|
2022-06-24 16:49:03 +02:00
|
|
|
ScriptUtils.fixUtils()
|
|
|
|
main(forceRefresh).then(() => console.log("Done!"))
|