2023-07-15 18:04:30 +02:00
import known_layers from "../src/assets/generated/known_layers.json"
import { LayerConfigJson } from "../src/Models/ThemeConfig/Json/LayerConfigJson"
import { TagUtils } from "../src/Logic/Tags/TagUtils"
import { Utils } from "../src/Utils"
2024-05-13 17:21:40 +02:00
import { existsSync , readFileSync , writeFileSync } from "fs"
2022-03-13 01:27:19 +01:00
import ScriptUtils from "./ScriptUtils"
2023-12-04 16:10:05 +01:00
import TagRenderingConfig from "../src/Models/ThemeConfig/TagRenderingConfig"
import { And } from "../src/Logic/Tags/And"
2024-05-13 17:21:40 +02:00
import Script from "./Script"
2024-05-16 00:12:50 +02:00
import NameSuggestionIndex from "../src/Logic/Web/NameSuggestionIndex"
2024-06-01 12:48:22 +02:00
import TagInfo from "../src/Logic/Web/TagInfo"
2022-03-13 01:27:19 +01:00
2024-05-13 17:21:40 +02:00
class Utilities {
2024-06-16 16:06:26 +02:00
static mapValues < X extends string | number , T , TOut > (
record : Record < X , T > ,
f : ( t : T ) = > TOut
) : Record < X , TOut > {
2024-05-13 17:21:40 +02:00
const newR = < Record < X , TOut > > { }
for ( const x in record ) {
newR [ x ] = f ( record [ x ] )
}
return newR
}
}
2024-05-23 04:42:26 +02:00
2024-05-13 17:21:40 +02:00
class GenerateStats extends Script {
async createOptimizationFile ( includeTags = true ) {
ScriptUtils . fixUtils ( )
const layers = < LayerConfigJson [ ] > known_layers . layers
2022-03-13 01:27:19 +01:00
2024-05-13 17:21:40 +02:00
const keysAndTags = new Map < string , Set < string > > ( )
2022-03-13 01:27:19 +01:00
2024-05-13 17:21:40 +02:00
for ( const layer of layers ) {
if ( layer . source [ "geoJson" ] !== undefined && ! layer . source [ "isOsmCache" ] ) {
continue
}
if ( layer . source == null || typeof layer . source === "string" ) {
continue
}
2022-03-13 01:27:19 +01:00
2024-05-13 17:21:40 +02:00
const sourcesList = [ TagUtils . Tag ( layer . source [ "osmTags" ] ) ]
if ( layer ? . title ) {
sourcesList . push ( . . . new TagRenderingConfig ( layer . title ) . usedTags ( ) )
}
2023-12-04 16:10:05 +01:00
2024-05-13 17:21:40 +02:00
const sources = new And ( sourcesList )
const allKeys = sources . usedKeys ( )
for ( const key of allKeys ) {
if ( ! keysAndTags . has ( key ) ) {
keysAndTags . set ( key , new Set < string > ( ) )
}
2022-03-13 01:27:19 +01:00
}
2024-05-13 17:21:40 +02:00
const allTags = includeTags ? sources . usedTags ( ) : [ ]
for ( const tag of allTags ) {
if ( ! keysAndTags . has ( tag . key ) ) {
keysAndTags . set ( tag . key , new Set < string > ( ) )
}
keysAndTags . get ( tag . key ) . add ( tag . value )
2022-03-13 01:27:19 +01:00
}
}
2024-05-13 17:21:40 +02:00
const keyTotal = new Map < string , number > ( )
const tagTotal = new Map < string , Map < string , number > > ( )
await Promise . all (
Array . from ( keysAndTags . keys ( ) ) . map ( async ( key ) = > {
const values = keysAndTags . get ( key )
2024-05-16 00:12:50 +02:00
const data = await TagInfo . global . getStats ( key )
2024-05-13 17:21:40 +02:00
const count = data . data . find ( ( item ) = > item . type === "all" ) . count
keyTotal . set ( key , count )
console . log ( key , "-->" , count )
if ( values . size > 0 ) {
tagTotal . set ( key , new Map < string , number > ( ) )
await Promise . all (
Array . from ( values ) . map ( async ( value ) = > {
2024-06-16 16:06:26 +02:00
const tagData = await TagInfo . global . getStats ( key , value )
const count = tagData . data . find ( ( item ) = > item . type === "all" ) . count
tagTotal . get ( key ) . set ( value , count )
console . log ( key + "=" + value , "-->" , count )
} )
2024-05-13 17:21:40 +02:00
)
}
2024-06-16 16:06:26 +02:00
} )
)
2024-05-13 17:21:40 +02:00
writeFileSync (
"./src/assets/key_totals.json" ,
JSON . stringify (
{
"#" : "Generated with generateStats.ts" ,
date : new Date ( ) . toISOString ( ) ,
keys : Utils.MapToObj ( keyTotal , ( t ) = > t ) ,
2024-06-16 16:06:26 +02:00
tags : Utils.MapToObj ( tagTotal , ( v ) = > Utils . MapToObj ( v , ( t ) = > t ) ) ,
2024-05-13 17:21:40 +02:00
} ,
null ,
" "
2022-03-13 01:27:19 +01:00
)
2024-05-13 17:21:40 +02:00
)
}
2022-03-13 01:27:19 +01:00
2024-05-16 00:12:50 +02:00
private summarizeNSI ( sourcefile : string , pathNoExtension : string ) : void {
2024-06-16 16:06:26 +02:00
const data = < Record < string , Record < string , number > >> (
JSON . parse ( readFileSync ( sourcefile , "utf8" ) )
)
2024-05-16 00:12:50 +02:00
const allCountries : Set < string > = new Set ( )
for ( const brand in data ) {
const perCountry = data [ brand ]
for ( const country in perCountry ) {
allCountries . add ( country )
const count = perCountry [ country ]
if ( count === 0 ) {
delete perCountry [ country ]
}
}
}
const pathOut = pathNoExtension + ".summarized.json"
2024-06-16 16:06:26 +02:00
writeFileSync ( pathOut , JSON . stringify ( data , null , " " ) , "utf8" )
2024-05-16 00:12:50 +02:00
console . log ( "Written" , pathOut )
const allBrands = Object . keys ( data )
allBrands . sort ( )
for ( const country of allCountries ) {
const summary = < Record < string , number > > { }
for ( const brand of allBrands ) {
const count = data [ brand ] [ country ]
2024-06-16 16:06:26 +02:00
if ( count > 2 ) {
// Eéntje is geentje
2024-05-16 00:12:50 +02:00
// We ignore count == 1 as they are rather exceptional
summary [ brand ] = data [ brand ] [ country ]
}
}
const countryPath = pathNoExtension + "." + country + ".json"
writeFileSync ( countryPath , JSON . stringify ( summary ) , "utf8" )
console . log ( "Written" , countryPath )
}
}
2024-05-23 04:42:26 +02:00
async createNameSuggestionIndexFile ( basepath : string , type : "brand" | "operator" | string ) {
const path = basepath + type + ".json"
2024-05-13 17:21:40 +02:00
let allBrands = < Record < string , Record < string , number > >> { }
if ( existsSync ( path ) ) {
allBrands = JSON . parse ( readFileSync ( path , "utf8" ) )
2024-06-16 16:06:26 +02:00
console . log (
"Loaded" ,
Object . keys ( allBrands ) . length ,
" previously loaded " + type ,
"from" ,
path
)
2024-05-13 17:21:40 +02:00
}
2024-05-16 00:12:50 +02:00
let skipped = 0
2024-06-16 16:06:26 +02:00
const allBrandNames : string [ ] = Utils . Dedup (
NameSuggestionIndex . allPossible ( type ) . map ( ( item ) = > item . tags [ type ] )
)
const missingBrandNames : string [ ] = [ ]
2024-05-23 04:42:26 +02:00
for ( let i = 0 ; i < allBrandNames . length ; i ++ ) {
2024-05-16 00:12:50 +02:00
const brand = allBrandNames [ i ]
2024-05-23 04:42:26 +02:00
if ( ! ! allBrands [ brand ] && Object . keys ( allBrands [ brand ] ) . length == 0 ) {
2024-05-16 00:12:50 +02:00
delete allBrands [ brand ]
console . log ( "Deleted" , brand , "as no entries at all" )
}
2024-05-23 04:42:26 +02:00
if ( allBrands [ brand ] !== undefined ) {
2024-05-16 00:12:50 +02:00
const max = Math . max ( . . . Object . values ( allBrands [ brand ] ) )
skipped ++
2024-05-30 20:06:24 +02:00
if ( skipped % 100 == 0 ) {
console . warn ( "Busy; " , i + "/" + allBrandNames . length , "; skipped" , skipped )
}
2024-05-23 04:42:26 +02:00
if ( max < 0 ) {
2024-05-16 00:12:50 +02:00
console . log ( "HMMMM:" , allBrands [ brand ] )
delete allBrands [ brand ]
2024-05-23 04:42:26 +02:00
} else {
2024-05-16 00:12:50 +02:00
continue
}
2022-09-08 21:40:48 +02:00
}
2024-05-30 20:06:24 +02:00
missingBrandNames . push ( brand )
}
const batchSize = 101
for ( let i = 0 ; i < missingBrandNames . length ; i += batchSize ) {
2024-06-16 16:06:26 +02:00
console . warn (
"Downloading" ,
batchSize ,
"items: " ,
i + "/" + missingBrandNames . length ,
"; skipped" ,
skipped ,
"total:" ,
allBrandNames . length
)
2024-05-30 20:06:24 +02:00
2024-06-16 16:06:26 +02:00
const distributions = await Promise . all (
Utils . TimesT ( batchSize , async ( j ) = > {
await ScriptUtils . sleep ( j * 250 )
return TagInfo . getGlobalDistributionsFor ( type , missingBrandNames [ i + j ] )
} )
)
2024-05-30 20:06:24 +02:00
for ( let j = 0 ; j < distributions . length ; j ++ ) {
const brand = missingBrandNames [ i + j ]
2024-06-16 16:06:26 +02:00
const distribution : Record < string , number > = Utilities . mapValues (
distributions [ j ] ,
( s ) = > s . data . find ( ( t ) = > t . type === "all" ) . count
)
2024-05-30 20:06:24 +02:00
allBrands [ brand ] = distribution
2024-05-13 17:21:40 +02:00
}
2024-05-30 20:06:24 +02:00
writeFileSync ( path , JSON . stringify ( allBrands ) , "utf8" )
console . log ( "Checkpointed" , path )
2024-05-13 17:21:40 +02:00
}
writeFileSync ( path , JSON . stringify ( allBrands ) , "utf8" )
}
constructor ( ) {
2024-06-16 16:06:26 +02:00
super (
"Downloads stats on osmSource-tags and keys from tagInfo. There are two usecases with separate outputs:\n 1. To optimize the query before sending it to overpass (generates ./src/assets/key_totals.json) \n 2. To amend the Name Suggestion Index "
)
2024-05-13 17:21:40 +02:00
}
async main ( _ : string [ ] ) {
2024-05-16 00:12:50 +02:00
const basepath = "./src/assets/generated/stats/"
2024-05-23 11:38:29 +02:00
await this . createOptimizationFile ( )
for ( const type of [ "operator" , "brand" ] ) {
2024-05-23 04:42:26 +02:00
await this . createNameSuggestionIndexFile ( basepath , type )
this . summarizeNSI ( basepath + type + ".json" , "./public/assets/data/nsi/stats/" + type )
}
2024-05-13 17:21:40 +02:00
}
2022-03-13 01:27:19 +01:00
}
2024-05-13 17:21:40 +02:00
new GenerateStats ( ) . run ( )