Make generateStats script more performant

This commit is contained in:
Pieter Vander Vennet 2024-05-30 20:06:24 +02:00
parent a37ffb5523
commit 71380aa557
2 changed files with 24 additions and 12 deletions

View file

@ -178,7 +178,7 @@ export default class ScriptUtils {
if(!headers.Accept){ if(!headers.Accept){
headers.accept ??= "application/json" headers.accept ??= "application/json"
} }
console.log(" > ScriptUtils.Download(", url, ")") ScriptUtils.erasableLog(" > ScriptUtils.Download(", url, ")")
const urlObj = new URL(url) const urlObj = new URL(url)
const request = https.get( const request = https.get(
{ {

View file

@ -140,15 +140,12 @@ class GenerateStats extends Script {
let allBrands = <Record<string, Record<string, number>>>{} let allBrands = <Record<string, Record<string, number>>>{}
if (existsSync(path)) { if (existsSync(path)) {
allBrands = JSON.parse(readFileSync(path, "utf8")) allBrands = JSON.parse(readFileSync(path, "utf8"))
console.log("Loaded", Object.keys(allBrands).length, " previously loaded brands") console.log("Loaded", Object.keys(allBrands).length, " previously loaded " + type,"from",path)
} }
const lastWrite = new Date()
let skipped = 0 let skipped = 0
const allBrandNames: string[] = Utils.Dedup(NameSuggestionIndex.allPossible(type).map(item => item.tags[type])) const allBrandNames: string[] = Utils.Dedup(NameSuggestionIndex.allPossible(type).map(item => item.tags[type]))
const missingBrandNames : string[] = []
for (let i = 0; i < allBrandNames.length; i++) { for (let i = 0; i < allBrandNames.length; i++) {
if (i % 100 == 0) {
console.log("Downloading ", i + "/" + allBrandNames.length, "; skipped", skipped)
}
const brand = allBrandNames[i] const brand = allBrandNames[i]
if (!!allBrands[brand] && Object.keys(allBrands[brand]).length == 0) { if (!!allBrands[brand] && Object.keys(allBrands[brand]).length == 0) {
delete allBrands[brand] delete allBrands[brand]
@ -157,20 +154,35 @@ class GenerateStats extends Script {
if (allBrands[brand] !== undefined) { if (allBrands[brand] !== undefined) {
const max = Math.max(...Object.values(allBrands[brand])) const max = Math.max(...Object.values(allBrands[brand]))
skipped++ skipped++
if (skipped % 100 == 0) {
console.warn("Busy; ", i + "/" + allBrandNames.length, "; skipped", skipped)
}
if (max < 0) { if (max < 0) {
console.log("HMMMM:", allBrands[brand]) console.log("HMMMM:", allBrands[brand])
delete allBrands[brand] delete allBrands[brand]
} else { } else {
continue continue
} }
} }
const distribution: Record<string, number> = Utilities.mapValues(await TagInfo.getGlobalDistributionsFor(type, brand), s => s.data.find(t => t.type === "all").count) missingBrandNames.push(brand)
allBrands[brand] = distribution
if ((new Date().getTime() - lastWrite.getTime()) / 1000 >= 5) { }
writeFileSync(path, JSON.stringify(allBrands), "utf8") const batchSize = 101
console.log("Checkpointed", path) for (let i = 0; i < missingBrandNames.length; i += batchSize) {
console.warn("Downloading",batchSize,"items: ", i + "/" + (missingBrandNames.length), "; skipped", skipped, "total:",allBrandNames.length)
const distributions = await Promise.all(Utils.TimesT(batchSize, async j => {
await ScriptUtils.sleep(j * 250)
return TagInfo.getGlobalDistributionsFor(type, missingBrandNames[i + j])
}))
for (let j = 0; j < distributions.length; j++) {
const brand = missingBrandNames[i + j]
const distribution: Record<string, number> = Utilities.mapValues(distributions[j], s => s.data.find(t => t.type === "all").count)
allBrands[brand] = distribution
} }
writeFileSync(path, JSON.stringify(allBrands), "utf8")
console.log("Checkpointed", path)
} }
writeFileSync(path, JSON.stringify(allBrands), "utf8") writeFileSync(path, JSON.stringify(allBrands), "utf8")
} }