Make generateStats script more performant

This commit is contained in:
Pieter Vander Vennet 2024-05-30 20:06:24 +02:00
parent a37ffb5523
commit 71380aa557
2 changed files with 24 additions and 12 deletions

View file

@ -178,7 +178,7 @@ export default class ScriptUtils {
if(!headers.Accept){
headers.accept ??= "application/json"
}
console.log(" > ScriptUtils.Download(", url, ")")
ScriptUtils.erasableLog(" > ScriptUtils.Download(", url, ")")
const urlObj = new URL(url)
const request = https.get(
{

View file

@ -140,15 +140,12 @@ class GenerateStats extends Script {
let allBrands = <Record<string, Record<string, number>>>{}
if (existsSync(path)) {
allBrands = JSON.parse(readFileSync(path, "utf8"))
console.log("Loaded", Object.keys(allBrands).length, " previously loaded brands")
console.log("Loaded", Object.keys(allBrands).length, " previously loaded " + type,"from",path)
}
const lastWrite = new Date()
let skipped = 0
const allBrandNames: string[] = Utils.Dedup(NameSuggestionIndex.allPossible(type).map(item => item.tags[type]))
const missingBrandNames : string[] = []
for (let i = 0; i < allBrandNames.length; i++) {
if (i % 100 == 0) {
console.log("Downloading ", i + "/" + allBrandNames.length, "; skipped", skipped)
}
const brand = allBrandNames[i]
if (!!allBrands[brand] && Object.keys(allBrands[brand]).length == 0) {
delete allBrands[brand]
@ -157,20 +154,35 @@ class GenerateStats extends Script {
if (allBrands[brand] !== undefined) {
const max = Math.max(...Object.values(allBrands[brand]))
skipped++
if (skipped % 100 == 0) {
console.warn("Busy; ", i + "/" + allBrandNames.length, "; skipped", skipped)
}
if (max < 0) {
console.log("HMMMM:", allBrands[brand])
delete allBrands[brand]
} else {
continue
}
}
const distribution: Record<string, number> = Utilities.mapValues(await TagInfo.getGlobalDistributionsFor(type, brand), s => s.data.find(t => t.type === "all").count)
allBrands[brand] = distribution
if ((new Date().getTime() - lastWrite.getTime()) / 1000 >= 5) {
writeFileSync(path, JSON.stringify(allBrands), "utf8")
console.log("Checkpointed", path)
missingBrandNames.push(brand)
}
const batchSize = 101
for (let i = 0; i < missingBrandNames.length; i += batchSize) {
console.warn("Downloading",batchSize,"items: ", i + "/" + (missingBrandNames.length), "; skipped", skipped, "total:",allBrandNames.length)
const distributions = await Promise.all(Utils.TimesT(batchSize, async j => {
await ScriptUtils.sleep(j * 250)
return TagInfo.getGlobalDistributionsFor(type, missingBrandNames[i + j])
}))
for (let j = 0; j < distributions.length; j++) {
const brand = missingBrandNames[i + j]
const distribution: Record<string, number> = Utilities.mapValues(distributions[j], s => s.data.find(t => t.type === "all").count)
allBrands[brand] = distribution
}
writeFileSync(path, JSON.stringify(allBrands), "utf8")
console.log("Checkpointed", path)
}
writeFileSync(path, JSON.stringify(allBrands), "utf8")
}