mapcomplete/Docs/Tools/GenerateSeries.ts

253 lines
8.6 KiB
TypeScript
Raw Normal View History

2022-09-08 21:40:48 +02:00
import { existsSync, mkdirSync, readdirSync, readFileSync, unlinkSync, writeFileSync } from "fs"
import ScriptUtils from "../../scripts/ScriptUtils"
import { Utils } from "../../Utils"
2021-08-22 15:53:05 +02:00
2022-02-14 01:15:20 +01:00
ScriptUtils.fixUtils()
2021-08-22 15:53:05 +02:00
class StatsDownloader {
2022-09-08 21:40:48 +02:00
private readonly urlTemplate =
"https://osmcha.org/api/v1/changesets/?date__gte={start_date}&date__lte={end_date}&page={page}&comment=%23mapcomplete&page_size=100"
2021-08-22 15:53:05 +02:00
2022-09-08 21:40:48 +02:00
private readonly _targetDirectory: string
2021-08-22 15:53:05 +02:00
constructor(targetDirectory = ".") {
2022-09-08 21:40:48 +02:00
this._targetDirectory = targetDirectory
2021-08-22 15:53:05 +02:00
}
public async DownloadStats(startYear = 2020, startMonth = 5, startDay = 1) {
2022-09-08 21:40:48 +02:00
const today = new Date()
2022-07-16 01:01:51 +02:00
const currentYear = today.getFullYear()
const currentMonth = today.getMonth() + 1
2022-08-20 18:27:25 +02:00
for (let year = startYear; year <= currentYear; year++) {
2021-08-22 15:53:05 +02:00
for (let month = 1; month <= 12; month++) {
2022-08-20 18:27:25 +02:00
if (year === startYear && month < startMonth) {
2022-09-08 21:40:48 +02:00
continue
2021-08-22 15:53:05 +02:00
}
if (year === currentYear && month > currentMonth) {
2022-07-16 01:01:51 +02:00
break
2021-08-22 15:53:05 +02:00
}
2022-07-16 01:01:51 +02:00
const pathM = `${this._targetDirectory}/stats.${year}-${month}.json`
if (existsSync(pathM)) {
2022-09-08 21:40:48 +02:00
continue
2022-07-16 01:01:51 +02:00
}
2022-08-20 12:46:33 +02:00
const features = []
2022-09-03 23:39:35 +02:00
let monthIsFinished = true
const writtenFiles = []
for (let day = startDay; day <= 31; day++) {
2022-08-20 12:46:33 +02:00
if (year === currentYear && month === currentMonth && day === today.getDate()) {
2022-09-03 23:39:35 +02:00
monthIsFinished = false
2022-09-08 21:40:48 +02:00
break
2021-08-22 15:53:05 +02:00
}
2022-08-20 18:27:25 +02:00
{
const date = new Date(year, month - 1, day)
2022-09-08 21:40:48 +02:00
if (date.getMonth() != month - 1) {
2022-08-20 18:27:25 +02:00
// We did roll over
continue
}
}
2022-09-08 21:40:48 +02:00
const path = `${this._targetDirectory}/stats.${year}-${month}-${
(day < 10 ? "0" : "") + day
}.day.json`
2022-09-03 23:39:35 +02:00
writtenFiles.push(path)
2022-08-20 12:46:33 +02:00
if (existsSync(path)) {
let features = JSON.parse(readFileSync(path, "UTF-8"))
features = features?.features ?? features
console.log(features)
2022-09-08 21:40:48 +02:00
features.push(...features.features) // day-stats are generally a list already, but in some ad-hoc cases might be a geojson-collection too
console.log(
"Loaded ",
path,
"from disk, got",
features.length,
"features now"
)
2022-07-16 01:01:51 +02:00
continue
}
2022-08-20 12:46:33 +02:00
let dayFeatures: any[] = undefined
try {
dayFeatures = await this.DownloadStatsForDay(year, month, day, path)
} catch (e) {
2022-07-29 23:25:22 +02:00
console.error(e)
2022-09-08 21:40:48 +02:00
console.error(
"Could not download " +
year +
"-" +
month +
"-" +
day +
"... Trying again"
)
2022-08-20 12:46:33 +02:00
dayFeatures = await this.DownloadStatsForDay(year, month, day, path)
2022-07-29 23:25:22 +02:00
}
2022-08-20 12:46:33 +02:00
writeFileSync(path, JSON.stringify(dayFeatures))
features.push(...dayFeatures)
2021-08-22 15:53:05 +02:00
}
2022-09-08 21:40:48 +02:00
if (monthIsFinished) {
writeFileSync(pathM, JSON.stringify({ features }))
2022-09-03 23:39:35 +02:00
for (const writtenFile of writtenFiles) {
unlinkSync(writtenFile)
}
}
2021-08-22 15:53:05 +02:00
}
startDay = 1
2021-08-22 15:53:05 +02:00
}
}
2022-09-08 21:40:48 +02:00
public async DownloadStatsForDay(
year: number,
month: number,
day: number,
path: string
): Promise<any[]> {
let page = 1
2021-08-22 15:53:05 +02:00
let allFeatures = []
2022-09-08 21:40:48 +02:00
let endDay = new Date(year, month - 1 /* Zero-indexed: 0 = january*/, day + 1)
let endDate = `${endDay.getFullYear()}-${Utils.TwoDigits(
endDay.getMonth() + 1
)}-${Utils.TwoDigits(endDay.getDate())}`
let url = this.urlTemplate
.replace(
"{start_date}",
year + "-" + Utils.TwoDigits(month) + "-" + Utils.TwoDigits(day)
)
2021-08-22 15:53:05 +02:00
.replace("{end_date}", endDate)
.replace("{page}", "" + page)
let headers = {
2022-09-08 21:40:48 +02:00
"User-Agent":
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0",
"Accept-Language": "en-US,en;q=0.5",
Referer:
"https://osmcha.org/?filters=%7B%22date__gte%22%3A%5B%7B%22label%22%3A%222020-07-05%22%2C%22value%22%3A%222020-07-05%22%7D%5D%2C%22editor%22%3A%5B%7B%22label%22%3A%22mapcomplete%22%2C%22value%22%3A%22mapcomplete%22%7D%5D%7D",
"Content-Type": "application/json",
Authorization: "Token 6e422e2afedb79ef66573982012000281f03dc91",
DNT: "1",
Connection: "keep-alive",
TE: "Trailers",
Pragma: "no-cache",
"Cache-Control": "no-cache",
2021-08-22 15:53:05 +02:00
}
while (url) {
2022-09-08 21:40:48 +02:00
ScriptUtils.erasableLog(
`Downloading stats for ${year}-${month}-${day}, page ${page} ${url}`
)
2022-02-14 01:15:20 +01:00
const result = await Utils.downloadJson(url, headers)
2022-09-08 21:40:48 +02:00
page++
2021-08-22 15:53:05 +02:00
allFeatures.push(...result.features)
if (result.features === undefined) {
console.log("ERROR", result)
return
}
url = result.next
}
2022-09-08 21:40:48 +02:00
console.log(
`Writing ${allFeatures.length} features to `,
path,
Utils.Times((_) => " ", 80)
)
2022-01-16 02:52:46 +01:00
allFeatures = Utils.NoNull(allFeatures)
2022-09-08 21:40:48 +02:00
allFeatures.forEach((f) => {
f.properties = { ...f.properties, ...f.properties.metadata }
2022-08-20 12:46:33 +02:00
delete f.properties.metadata
2022-01-16 02:52:46 +01:00
f.properties.id = f.id
})
2022-08-20 12:46:33 +02:00
return allFeatures
2021-08-22 15:53:05 +02:00
}
}
interface ChangeSetData {
2022-09-08 21:40:48 +02:00
id: number
type: "Feature"
geometry: {
type: "Polygon"
coordinates: [number, number][][]
}
properties: {
check_user: null
reasons: []
tags: []
features: []
user: string
uid: string
editor: string
comment: string
comments_count: number
source: string
imagery_used: string
date: string
reviewed_features: []
create: number
modify: number
delete: number
area: number
is_suspect: boolean
harmful: any
checked: boolean
check_date: any
metadata: {
host: string
theme: string
imagery: string
language: string
2021-08-22 15:53:05 +02:00
}
}
}
2022-03-17 16:40:53 +01:00
async function main(): Promise<void> {
if (!existsSync("graphs")) {
mkdirSync("graphs")
}
2022-03-17 16:40:53 +01:00
2022-07-16 01:01:51 +02:00
const targetDir = "Docs/Tools/stats"
2022-08-20 18:27:25 +02:00
let year = 2020
let month = 5
let day = 1
2022-09-08 21:40:48 +02:00
if (!isNaN(Number(process.argv[2]))) {
2022-08-20 18:27:25 +02:00
year = Number(process.argv[2])
2022-03-14 02:48:50 +01:00
}
2022-09-08 21:40:48 +02:00
if (!isNaN(Number(process.argv[3]))) {
2022-08-20 18:27:25 +02:00
month = Number(process.argv[3])
}
2022-09-08 21:40:48 +02:00
if (!isNaN(Number(process.argv[4]))) {
day = Number(process.argv[4])
}
2022-08-20 18:27:25 +02:00
do {
try {
await new StatsDownloader(targetDir).DownloadStats(year, month, day)
2022-08-20 18:27:25 +02:00
break
} catch (e) {
console.log(e)
}
} while (true)
2022-09-08 21:40:48 +02:00
const allPaths = readdirSync(targetDir).filter(
(p) => p.startsWith("stats.") && p.endsWith(".json")
)
let allFeatures: ChangeSetData[] = [].concat(
...allPaths.map(
(path) => JSON.parse(readFileSync("Docs/Tools/stats/" + path, "utf-8")).features
)
)
allFeatures = allFeatures.filter(
(f) =>
f?.properties !== undefined &&
(f.properties.editor === null ||
f.properties.editor.toLowerCase().startsWith("mapcomplete"))
)
allFeatures = allFeatures.filter((f) => f.properties.metadata?.theme !== "EMPTY CS")
2022-01-16 02:52:46 +01:00
2022-03-17 16:40:53 +01:00
if (process.argv.indexOf("--no-graphs") >= 0) {
return
2022-03-14 02:48:50 +01:00
}
2022-09-08 21:40:48 +02:00
const allFiles = readdirSync("Docs/Tools/stats").filter((p) => p.endsWith(".json"))
2022-07-29 23:25:22 +02:00
writeFileSync("Docs/Tools/stats/file-overview.json", JSON.stringify(allFiles))
2022-01-16 02:52:46 +01:00
}
2021-08-22 15:53:05 +02:00
2022-09-08 21:40:48 +02:00
main().then((_) => console.log("All done!"))