2023-02-01 02:13:04 +01:00
|
|
|
import { existsSync, readdirSync, readFileSync, unlinkSync, writeFileSync } from "fs"
|
2023-01-09 20:38:05 +01:00
|
|
|
import ScriptUtils from "./ScriptUtils"
|
2023-09-02 00:55:07 +02:00
|
|
|
import { Utils } from "../src/Utils"
|
2023-01-09 20:38:05 +01:00
|
|
|
import Script from "./Script"
|
2023-09-02 00:55:07 +02:00
|
|
|
import { GeoOperations } from "../src/Logic/GeoOperations"
|
2023-01-09 20:38:05 +01:00
|
|
|
import { Feature, Polygon } from "geojson"
|
2023-09-02 00:55:07 +02:00
|
|
|
import { Tiles } from "../src/Models/TileRange"
|
2024-06-12 14:45:51 +02:00
|
|
|
import { BBox } from "../src/Logic/BBox"
|
2022-02-14 01:15:20 +01:00
|
|
|
|
2021-08-22 15:53:05 +02:00
|
|
|
class StatsDownloader {
|
2022-09-08 21:40:48 +02:00
|
|
|
private readonly urlTemplate =
|
|
|
|
"https://osmcha.org/api/v1/changesets/?date__gte={start_date}&date__lte={end_date}&page={page}&comment=%23mapcomplete&page_size=100"
|
2021-08-22 15:53:05 +02:00
|
|
|
|
2022-09-08 21:40:48 +02:00
|
|
|
private readonly _targetDirectory: string
|
2021-08-22 15:53:05 +02:00
|
|
|
|
|
|
|
constructor(targetDirectory = ".") {
|
2022-09-08 21:40:48 +02:00
|
|
|
this._targetDirectory = targetDirectory
|
2021-08-22 15:53:05 +02:00
|
|
|
}
|
|
|
|
|
2023-01-13 02:43:21 +01:00
|
|
|
public async DownloadStats(startYear = 2020, startMonth = 5, startDay = 1): Promise<void> {
|
2022-09-08 21:40:48 +02:00
|
|
|
const today = new Date()
|
2022-07-16 01:01:51 +02:00
|
|
|
const currentYear = today.getFullYear()
|
|
|
|
const currentMonth = today.getMonth() + 1
|
2022-08-20 18:27:25 +02:00
|
|
|
for (let year = startYear; year <= currentYear; year++) {
|
2021-08-22 15:53:05 +02:00
|
|
|
for (let month = 1; month <= 12; month++) {
|
2022-08-20 18:27:25 +02:00
|
|
|
if (year === startYear && month < startMonth) {
|
2022-09-08 21:40:48 +02:00
|
|
|
continue
|
2021-08-22 15:53:05 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (year === currentYear && month > currentMonth) {
|
2022-07-16 01:01:51 +02:00
|
|
|
break
|
2021-08-22 15:53:05 +02:00
|
|
|
}
|
|
|
|
|
2022-07-16 01:01:51 +02:00
|
|
|
const pathM = `${this._targetDirectory}/stats.${year}-${month}.json`
|
|
|
|
if (existsSync(pathM)) {
|
2022-09-08 21:40:48 +02:00
|
|
|
continue
|
2022-07-16 01:01:51 +02:00
|
|
|
}
|
|
|
|
|
2022-08-20 12:46:33 +02:00
|
|
|
const features = []
|
2022-09-03 23:39:35 +02:00
|
|
|
let monthIsFinished = true
|
|
|
|
const writtenFiles = []
|
2022-09-03 14:44:07 +02:00
|
|
|
for (let day = startDay; day <= 31; day++) {
|
2022-08-20 12:46:33 +02:00
|
|
|
if (year === currentYear && month === currentMonth && day === today.getDate()) {
|
2022-09-03 23:39:35 +02:00
|
|
|
monthIsFinished = false
|
2022-09-08 21:40:48 +02:00
|
|
|
break
|
2021-08-22 15:53:05 +02:00
|
|
|
}
|
2022-08-20 18:27:25 +02:00
|
|
|
{
|
|
|
|
const date = new Date(year, month - 1, day)
|
2022-09-08 21:40:48 +02:00
|
|
|
if (date.getMonth() != month - 1) {
|
2022-08-20 18:27:25 +02:00
|
|
|
// We did roll over
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
2022-09-08 21:40:48 +02:00
|
|
|
const path = `${this._targetDirectory}/stats.${year}-${month}-${
|
|
|
|
(day < 10 ? "0" : "") + day
|
|
|
|
}.day.json`
|
2022-09-03 23:39:35 +02:00
|
|
|
writtenFiles.push(path)
|
2022-08-20 12:46:33 +02:00
|
|
|
if (existsSync(path)) {
|
2023-02-01 02:13:04 +01:00
|
|
|
let loadedFeatures = JSON.parse(readFileSync(path, { encoding: "utf-8" }))
|
|
|
|
loadedFeatures = loadedFeatures?.features ?? loadedFeatures
|
|
|
|
features.push(...loadedFeatures) // day-stats are generally a list already, but in some ad-hoc cases might be a geojson-collection too
|
2022-09-08 21:40:48 +02:00
|
|
|
console.log(
|
|
|
|
"Loaded ",
|
|
|
|
path,
|
2023-01-13 02:43:21 +01:00
|
|
|
"from disk, which has",
|
2022-09-08 21:40:48 +02:00
|
|
|
features.length,
|
|
|
|
"features now"
|
|
|
|
)
|
2022-07-16 01:01:51 +02:00
|
|
|
continue
|
|
|
|
}
|
2022-08-20 12:46:33 +02:00
|
|
|
let dayFeatures: any[] = undefined
|
|
|
|
try {
|
2023-01-13 02:43:21 +01:00
|
|
|
dayFeatures = await this.DownloadStatsForDay(year, month, day)
|
2022-08-20 12:46:33 +02:00
|
|
|
} catch (e) {
|
2022-07-29 23:25:22 +02:00
|
|
|
console.error(e)
|
2022-09-08 21:40:48 +02:00
|
|
|
console.error(
|
|
|
|
"Could not download " +
|
|
|
|
year +
|
|
|
|
"-" +
|
|
|
|
month +
|
|
|
|
"-" +
|
|
|
|
day +
|
|
|
|
"... Trying again"
|
|
|
|
)
|
2023-01-13 02:43:21 +01:00
|
|
|
dayFeatures = await this.DownloadStatsForDay(year, month, day)
|
2022-07-29 23:25:22 +02:00
|
|
|
}
|
2022-08-20 12:46:33 +02:00
|
|
|
writeFileSync(path, JSON.stringify(dayFeatures))
|
|
|
|
features.push(...dayFeatures)
|
2021-08-22 15:53:05 +02:00
|
|
|
}
|
2022-09-08 21:40:48 +02:00
|
|
|
if (monthIsFinished) {
|
|
|
|
writeFileSync(pathM, JSON.stringify({ features }))
|
2022-09-03 23:39:35 +02:00
|
|
|
for (const writtenFile of writtenFiles) {
|
|
|
|
unlinkSync(writtenFile)
|
|
|
|
}
|
|
|
|
}
|
2021-08-22 15:53:05 +02:00
|
|
|
}
|
2022-09-03 14:44:07 +02:00
|
|
|
startDay = 1
|
2021-08-22 15:53:05 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-09-08 21:40:48 +02:00
|
|
|
public async DownloadStatsForDay(
|
|
|
|
year: number,
|
|
|
|
month: number,
|
2023-01-13 02:43:21 +01:00
|
|
|
day: number
|
|
|
|
): Promise<ChangeSetData[]> {
|
2022-09-08 21:40:48 +02:00
|
|
|
let page = 1
|
2023-01-13 02:43:21 +01:00
|
|
|
let allFeatures: ChangeSetData[] = []
|
2022-09-08 21:40:48 +02:00
|
|
|
let endDay = new Date(year, month - 1 /* Zero-indexed: 0 = january*/, day + 1)
|
|
|
|
let endDate = `${endDay.getFullYear()}-${Utils.TwoDigits(
|
|
|
|
endDay.getMonth() + 1
|
|
|
|
)}-${Utils.TwoDigits(endDay.getDate())}`
|
|
|
|
let url = this.urlTemplate
|
|
|
|
.replace(
|
|
|
|
"{start_date}",
|
|
|
|
year + "-" + Utils.TwoDigits(month) + "-" + Utils.TwoDigits(day)
|
|
|
|
)
|
2021-08-22 15:53:05 +02:00
|
|
|
.replace("{end_date}", endDate)
|
|
|
|
.replace("{page}", "" + page)
|
|
|
|
|
|
|
|
let headers = {
|
2022-09-08 21:40:48 +02:00
|
|
|
"User-Agent":
|
|
|
|
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0",
|
|
|
|
"Accept-Language": "en-US,en;q=0.5",
|
|
|
|
Referer:
|
|
|
|
"https://osmcha.org/?filters=%7B%22date__gte%22%3A%5B%7B%22label%22%3A%222020-07-05%22%2C%22value%22%3A%222020-07-05%22%7D%5D%2C%22editor%22%3A%5B%7B%22label%22%3A%22mapcomplete%22%2C%22value%22%3A%22mapcomplete%22%7D%5D%7D",
|
|
|
|
"Content-Type": "application/json",
|
2024-06-11 15:06:45 +02:00
|
|
|
Authorization: "Token 9cc11ad2868778272eadbb1a423ebb507184bc04",
|
2022-09-08 21:40:48 +02:00
|
|
|
DNT: "1",
|
|
|
|
Connection: "keep-alive",
|
|
|
|
TE: "Trailers",
|
|
|
|
Pragma: "no-cache",
|
|
|
|
"Cache-Control": "no-cache",
|
2021-08-22 15:53:05 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
while (url) {
|
2022-09-08 21:40:48 +02:00
|
|
|
ScriptUtils.erasableLog(
|
|
|
|
`Downloading stats for ${year}-${month}-${day}, page ${page} ${url}`
|
|
|
|
)
|
2024-06-11 15:06:45 +02:00
|
|
|
const result = await Utils.downloadJson<{features: [], next: string}>(url, headers)
|
2022-09-08 21:40:48 +02:00
|
|
|
page++
|
2021-08-22 15:53:05 +02:00
|
|
|
allFeatures.push(...result.features)
|
|
|
|
if (result.features === undefined) {
|
|
|
|
console.log("ERROR", result)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
url = result.next
|
|
|
|
}
|
2022-01-16 02:52:46 +01:00
|
|
|
allFeatures = Utils.NoNull(allFeatures)
|
2022-09-08 21:40:48 +02:00
|
|
|
allFeatures.forEach((f) => {
|
|
|
|
f.properties = { ...f.properties, ...f.properties.metadata }
|
2022-08-20 12:46:33 +02:00
|
|
|
delete f.properties.metadata
|
2023-01-13 02:43:21 +01:00
|
|
|
f.properties["id"] = f.id
|
2022-01-16 02:52:46 +01:00
|
|
|
})
|
2022-08-20 12:46:33 +02:00
|
|
|
return allFeatures
|
2021-08-22 15:53:05 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-09 20:38:05 +01:00
|
|
|
interface ChangeSetData extends Feature<Polygon> {
|
2022-09-08 21:40:48 +02:00
|
|
|
id: number
|
|
|
|
type: "Feature"
|
|
|
|
geometry: {
|
|
|
|
type: "Polygon"
|
|
|
|
coordinates: [number, number][][]
|
|
|
|
}
|
|
|
|
properties: {
|
|
|
|
check_user: null
|
|
|
|
reasons: []
|
|
|
|
tags: []
|
|
|
|
features: []
|
|
|
|
user: string
|
|
|
|
uid: string
|
|
|
|
editor: string
|
|
|
|
comment: string
|
|
|
|
comments_count: number
|
|
|
|
source: string
|
|
|
|
imagery_used: string
|
|
|
|
date: string
|
|
|
|
reviewed_features: []
|
|
|
|
create: number
|
|
|
|
modify: number
|
|
|
|
delete: number
|
|
|
|
area: number
|
|
|
|
is_suspect: boolean
|
|
|
|
harmful: any
|
|
|
|
checked: boolean
|
|
|
|
check_date: any
|
|
|
|
metadata: {
|
|
|
|
host: string
|
|
|
|
theme: string
|
|
|
|
imagery: string
|
|
|
|
language: string
|
2021-08-22 15:53:05 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-09 20:38:05 +01:00
|
|
|
class GenerateSeries extends Script {
|
|
|
|
constructor() {
|
|
|
|
super("Downloads metadata about changesets made by MapComplete from OsmCha")
|
2022-03-14 01:37:50 +01:00
|
|
|
}
|
2022-03-17 16:40:53 +01:00
|
|
|
|
2023-01-09 20:38:05 +01:00
|
|
|
async main(args: string[]): Promise<void> {
|
2023-04-14 05:12:27 +02:00
|
|
|
const targetDir = args[0] ?? "../../git/MapComplete-data"
|
2023-01-09 20:38:05 +01:00
|
|
|
|
|
|
|
await this.downloadStatistics(targetDir + "/changeset-metadata")
|
2024-06-11 15:06:45 +02:00
|
|
|
this.generateCenterPoints(
|
2023-01-09 20:38:05 +01:00
|
|
|
targetDir + "/changeset-metadata",
|
|
|
|
targetDir + "/mapcomplete-changes/",
|
|
|
|
{
|
2024-06-11 15:06:45 +02:00
|
|
|
zoomlevel: 8
|
2023-01-09 20:38:05 +01:00
|
|
|
}
|
|
|
|
)
|
2022-08-20 18:27:25 +02:00
|
|
|
}
|
2022-09-03 14:44:07 +02:00
|
|
|
|
2023-01-09 20:38:05 +01:00
|
|
|
private async downloadStatistics(targetDir: string) {
|
|
|
|
let year = 2020
|
|
|
|
let month = 5
|
|
|
|
let day = 1
|
|
|
|
if (!isNaN(Number(process.argv[2]))) {
|
|
|
|
year = Number(process.argv[2])
|
|
|
|
}
|
|
|
|
if (!isNaN(Number(process.argv[3]))) {
|
|
|
|
month = Number(process.argv[3])
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!isNaN(Number(process.argv[4]))) {
|
|
|
|
day = Number(process.argv[4])
|
|
|
|
}
|
|
|
|
|
|
|
|
do {
|
|
|
|
try {
|
|
|
|
await new StatsDownloader(targetDir).DownloadStats(year, month, day)
|
|
|
|
break
|
|
|
|
} catch (e) {
|
|
|
|
console.log(e)
|
|
|
|
}
|
|
|
|
} while (true)
|
|
|
|
|
|
|
|
const allFiles = readdirSync(targetDir).filter((p) => p.endsWith(".json"))
|
2023-01-09 21:05:24 +01:00
|
|
|
writeFileSync(targetDir + "/file-overview.json", JSON.stringify(allFiles))
|
2022-09-03 14:44:07 +02:00
|
|
|
}
|
|
|
|
|
2023-01-09 20:38:05 +01:00
|
|
|
private generateCenterPoints(
|
|
|
|
sourceDir: string,
|
|
|
|
targetDir: string,
|
|
|
|
options: {
|
|
|
|
zoomlevel: number
|
2022-08-20 18:27:25 +02:00
|
|
|
}
|
2023-01-09 20:38:05 +01:00
|
|
|
) {
|
|
|
|
const allPaths = readdirSync(sourceDir).filter(
|
|
|
|
(p) => p.startsWith("stats.") && p.endsWith(".json")
|
|
|
|
)
|
2024-06-11 15:06:45 +02:00
|
|
|
let allFeatures: ChangeSetData[] = allPaths.flatMap(
|
2023-01-09 20:38:05 +01:00
|
|
|
(path) => JSON.parse(readFileSync(sourceDir + "/" + path, "utf-8")).features
|
|
|
|
)
|
|
|
|
allFeatures = allFeatures.filter(
|
|
|
|
(f) =>
|
|
|
|
f?.properties !== undefined &&
|
|
|
|
(f.properties.editor === null ||
|
|
|
|
f.properties.editor.toLowerCase().startsWith("mapcomplete"))
|
|
|
|
)
|
|
|
|
|
|
|
|
allFeatures = allFeatures.filter(
|
|
|
|
(f) => f.geometry !== null && f.properties.metadata?.theme !== "EMPTY CS"
|
2022-09-08 21:40:48 +02:00
|
|
|
)
|
2023-01-09 20:38:05 +01:00
|
|
|
allFeatures = allFeatures.filter(
|
|
|
|
(f) =>
|
|
|
|
f?.properties !== undefined &&
|
|
|
|
(f.properties.editor === null ||
|
|
|
|
f.properties.editor.toLowerCase().startsWith("mapcomplete"))
|
|
|
|
)
|
|
|
|
|
2024-06-11 15:06:45 +02:00
|
|
|
allFeatures = allFeatures.filter((f) => f.properties.metadata?.theme !== "EMPTY CS" && f.geometry.coordinates.length > 0)
|
2024-06-12 14:45:51 +02:00
|
|
|
const centerpointsAll = allFeatures.map((f) => {
|
|
|
|
const centerpoint = GeoOperations.centerpoint(f)
|
2024-06-14 09:45:51 +02:00
|
|
|
const c = centerpoint.geometry.coordinates
|
2024-06-12 14:45:51 +02:00
|
|
|
// OsmCha doesn't adhere to the Geojson standard and uses `lat` `lon` as coordinates instead of `lon`, `lat`
|
2024-06-14 09:45:51 +02:00
|
|
|
centerpoint.geometry.coordinates = [c[1], c[0]]
|
2024-06-12 14:45:51 +02:00
|
|
|
return centerpoint
|
|
|
|
})
|
|
|
|
const centerpoints = centerpointsAll.filter(p => {
|
|
|
|
const bbox= BBox.get(p)
|
|
|
|
if(bbox.minLat === -90 && bbox.maxLat === -90){
|
|
|
|
// Due to some bug somewhere, those invalid bboxes might appear if the latitude is < 90
|
|
|
|
// This crashes the 'spreadIntoBBoxes
|
|
|
|
// As workaround, we simply ignore them for now
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
})
|
2023-01-09 20:38:05 +01:00
|
|
|
console.log("Found", centerpoints.length, " changesets in total")
|
2023-06-20 22:49:58 +02:00
|
|
|
|
|
|
|
const perBbox = GeoOperations.spreadIntoBboxes(centerpoints, options.zoomlevel)
|
|
|
|
|
|
|
|
for (const [tileNumber, features] of perBbox) {
|
|
|
|
const [z, x, y] = Tiles.tile_from_index(tileNumber)
|
|
|
|
const path = `${targetDir}/tile_${z}_${x}_${y}.geojson`
|
|
|
|
features.forEach((f) => {
|
|
|
|
delete f.bbox
|
|
|
|
})
|
|
|
|
writeFileSync(
|
|
|
|
path,
|
|
|
|
JSON.stringify(
|
|
|
|
{
|
|
|
|
type: "FeatureCollection",
|
|
|
|
features: features,
|
|
|
|
},
|
|
|
|
null,
|
|
|
|
" "
|
2023-01-09 20:38:05 +01:00
|
|
|
)
|
2023-06-20 22:49:58 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
ScriptUtils.erasableLog("Written ", path, "which has ", features.length, "features")
|
|
|
|
}
|
2022-03-14 02:48:50 +01:00
|
|
|
}
|
2022-01-16 02:52:46 +01:00
|
|
|
}
|
2021-08-22 15:53:05 +02:00
|
|
|
|
2023-01-09 20:38:05 +01:00
|
|
|
new GenerateSeries().run()
|