229 lines
8.7 KiB
TypeScript
229 lines
8.7 KiB
TypeScript
import known_layers from "../src/assets/generated/known_layers.json"
|
|
import { LayerConfigJson } from "../src/Models/ThemeConfig/Json/LayerConfigJson"
|
|
import { TagUtils } from "../src/Logic/Tags/TagUtils"
|
|
import { Utils } from "../src/Utils"
|
|
import { existsSync, readFileSync, writeFileSync } from "fs"
|
|
import ScriptUtils from "./ScriptUtils"
|
|
import TagRenderingConfig from "../src/Models/ThemeConfig/TagRenderingConfig"
|
|
import { And } from "../src/Logic/Tags/And"
|
|
import Script from "./Script"
|
|
import NameSuggestionIndex from "../src/Logic/Web/NameSuggestionIndex"
|
|
import TagInfo from "../src/Logic/Web/TagInfo"
|
|
|
|
class Utilities {
|
|
static mapValues<X extends string | number, T, TOut>(
|
|
record: Record<X, T>,
|
|
f: (t: T) => TOut
|
|
): Record<X, TOut> {
|
|
const newR = <Record<X, TOut>>{}
|
|
for (const x in record) {
|
|
newR[x] = f(record[x])
|
|
}
|
|
return newR
|
|
}
|
|
}
|
|
|
|
class GenerateStats extends Script {
|
|
async createOptimizationFile(includeTags = true) {
|
|
ScriptUtils.fixUtils()
|
|
const layers = <LayerConfigJson[]>known_layers.layers
|
|
|
|
const keysAndTags = new Map<string, Set<string>>()
|
|
|
|
for (const layer of layers) {
|
|
if (layer.source["geoJson"] !== undefined && !layer.source["isOsmCache"]) {
|
|
continue
|
|
}
|
|
if (layer.source == null || typeof layer.source === "string") {
|
|
continue
|
|
}
|
|
|
|
const sourcesList = [TagUtils.Tag(layer.source["osmTags"])]
|
|
if (layer?.title) {
|
|
sourcesList.push(...new TagRenderingConfig(layer.title).usedTags())
|
|
}
|
|
|
|
const sources = new And(sourcesList)
|
|
const allKeys = sources.usedKeys()
|
|
for (const key of allKeys) {
|
|
if (!keysAndTags.has(key)) {
|
|
keysAndTags.set(key, new Set<string>())
|
|
}
|
|
}
|
|
const allTags = includeTags ? sources.usedTags() : []
|
|
for (const tag of allTags) {
|
|
if (!keysAndTags.has(tag.key)) {
|
|
keysAndTags.set(tag.key, new Set<string>())
|
|
}
|
|
keysAndTags.get(tag.key).add(tag.value)
|
|
}
|
|
}
|
|
|
|
const keyTotal = new Map<string, number>()
|
|
const tagTotal = new Map<string, Map<string, number>>()
|
|
await Promise.all(
|
|
Array.from(keysAndTags.keys()).map(async (key) => {
|
|
const values = keysAndTags.get(key)
|
|
const data = await TagInfo.global.getStats(key)
|
|
const count = data.data.find((item) => item.type === "all").count
|
|
keyTotal.set(key, count)
|
|
console.log(key, "-->", count)
|
|
|
|
if (values.size > 0) {
|
|
tagTotal.set(key, new Map<string, number>())
|
|
await Promise.all(
|
|
Array.from(values).map(async (value) => {
|
|
const tagData = await TagInfo.global.getStats(key, value)
|
|
const count = tagData.data.find((item) => item.type === "all").count
|
|
tagTotal.get(key).set(value, count)
|
|
console.log(key + "=" + value, "-->", count)
|
|
})
|
|
)
|
|
}
|
|
})
|
|
)
|
|
writeFileSync(
|
|
"./src/assets/key_totals.json",
|
|
JSON.stringify(
|
|
{
|
|
"#": "Generated with generateStats.ts",
|
|
date: new Date().toISOString(),
|
|
keys: Utils.MapToObj(keyTotal, (t) => t),
|
|
tags: Utils.MapToObj(tagTotal, (v) => Utils.MapToObj(v, (t) => t)),
|
|
},
|
|
null,
|
|
" "
|
|
)
|
|
)
|
|
}
|
|
|
|
private summarizeNSI(sourcefile: string, pathNoExtension: string): void {
|
|
const data = <Record<string, Record<string, number>>>(
|
|
JSON.parse(readFileSync(sourcefile, "utf8"))
|
|
)
|
|
|
|
const allCountries: Set<string> = new Set()
|
|
for (const brand in data) {
|
|
const perCountry = data[brand]
|
|
for (const country in perCountry) {
|
|
allCountries.add(country)
|
|
const count = perCountry[country]
|
|
if (count === 0) {
|
|
delete perCountry[country]
|
|
}
|
|
}
|
|
}
|
|
|
|
const pathOut = pathNoExtension + ".summarized.json"
|
|
writeFileSync(pathOut, JSON.stringify(data, null, " "), "utf8")
|
|
console.log("Written", pathOut)
|
|
|
|
const allBrands = Object.keys(data)
|
|
allBrands.sort()
|
|
for (const country of allCountries) {
|
|
const summary = <Record<string, number>>{}
|
|
for (const brand of allBrands) {
|
|
const count = data[brand][country]
|
|
if (count > 2) {
|
|
// Eéntje is geentje
|
|
// We ignore count == 1 as they are rather exceptional
|
|
summary[brand] = data[brand][country]
|
|
}
|
|
}
|
|
|
|
const countryPath = pathNoExtension + "." + country + ".json"
|
|
writeFileSync(countryPath, JSON.stringify(summary), "utf8")
|
|
console.log("Written", countryPath)
|
|
}
|
|
}
|
|
|
|
async createNameSuggestionIndexFile(basepath: string, type: "brand" | "operator" | string) {
|
|
const path = basepath + type + ".json"
|
|
let allBrands = <Record<string, Record<string, number>>>{}
|
|
if (existsSync(path)) {
|
|
allBrands = JSON.parse(readFileSync(path, "utf8"))
|
|
console.log(
|
|
"Loaded",
|
|
Object.keys(allBrands).length,
|
|
" previously loaded " + type,
|
|
"from",
|
|
path
|
|
)
|
|
}
|
|
let skipped = 0
|
|
const allBrandNames: string[] = Utils.Dedup(
|
|
NameSuggestionIndex.allPossible(type).map((item) => item.tags[type])
|
|
)
|
|
const missingBrandNames: string[] = []
|
|
for (let i = 0; i < allBrandNames.length; i++) {
|
|
const brand = allBrandNames[i]
|
|
if (!!allBrands[brand] && Object.keys(allBrands[brand]).length == 0) {
|
|
delete allBrands[brand]
|
|
console.log("Deleted", brand, "as no entries at all")
|
|
}
|
|
if (allBrands[brand] !== undefined) {
|
|
const max = Math.max(...Object.values(allBrands[brand]))
|
|
skipped++
|
|
if (skipped % 100 == 0) {
|
|
console.warn("Busy; ", i + "/" + allBrandNames.length, "; skipped", skipped)
|
|
}
|
|
if (max < 0) {
|
|
console.log("HMMMM:", allBrands[brand])
|
|
delete allBrands[brand]
|
|
} else {
|
|
continue
|
|
}
|
|
}
|
|
missingBrandNames.push(brand)
|
|
}
|
|
const batchSize = 101
|
|
for (let i = 0; i < missingBrandNames.length; i += batchSize) {
|
|
console.warn(
|
|
"Downloading",
|
|
batchSize,
|
|
"items: ",
|
|
i + "/" + missingBrandNames.length,
|
|
"; skipped",
|
|
skipped,
|
|
"total:",
|
|
allBrandNames.length
|
|
)
|
|
|
|
const distributions = await Promise.all(
|
|
Utils.TimesT(batchSize, async (j) => {
|
|
await ScriptUtils.sleep(j * 250)
|
|
return TagInfo.getGlobalDistributionsFor(type, missingBrandNames[i + j])
|
|
})
|
|
)
|
|
for (let j = 0; j < distributions.length; j++) {
|
|
const brand = missingBrandNames[i + j]
|
|
const distribution: Record<string, number> = Utilities.mapValues(
|
|
distributions[j],
|
|
(s) => s.data.find((t) => t.type === "all").count
|
|
)
|
|
allBrands[brand] = distribution
|
|
}
|
|
writeFileSync(path, JSON.stringify(allBrands), "utf8")
|
|
console.log("Checkpointed", path)
|
|
}
|
|
writeFileSync(path, JSON.stringify(allBrands), "utf8")
|
|
}
|
|
|
|
constructor() {
|
|
super(
|
|
"Downloads stats on osmSource-tags and keys from tagInfo. There are two usecases with separate outputs:\n 1. To optimize the query before sending it to overpass (generates ./src/assets/key_totals.json) \n 2. To amend the Name Suggestion Index "
|
|
)
|
|
}
|
|
|
|
async main(_: string[]) {
|
|
const basepath = "./src/assets/generated/stats/"
|
|
await this.createOptimizationFile()
|
|
|
|
for (const type of ["operator", "brand"]) {
|
|
await this.createNameSuggestionIndexFile(basepath, type)
|
|
this.summarizeNSI(basepath + type + ".json", "./public/assets/data/nsi/stats/" + type)
|
|
}
|
|
}
|
|
}
|
|
|
|
new GenerateStats().run()
|