New scripts to generate statistics

This commit is contained in:
pietervdvn 2021-08-22 15:53:05 +02:00
parent 7b595e65c6
commit a2f3b967b7
108 changed files with 336663 additions and 4921 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 178 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 244 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 228 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 98 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 177 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 206 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 220 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 170 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 174 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 167 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 168 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 170 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 171 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 196 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 374 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 187 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 207 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 431 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 104 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 362 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 645 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 129 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 274 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 152 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 642 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 134 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 162 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 169 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 93 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 222 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 219 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 162 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 140 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 90 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 140 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 220 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 169 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 102 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 155 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 257 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 232 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 296 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 165 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 90 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 179 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 372 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 332 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 409 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 81 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 140 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 135 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 109 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 122 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 119 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 284 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 442 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 404 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 123 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 221 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 238 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 262 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 271 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 263 KiB

62
Docs/Tools/GenPlot.py Normal file
View file

@ -0,0 +1,62 @@
from datetime import datetime
from matplotlib import pyplot
import json
import sys
def pyplot_init():
pyplot.close('all')
pyplot.figure(figsize=(14, 8), dpi=200)
pyplot.xticks(rotation='vertical')
pyplot.grid()
def genKeys(data, type):
keys = map(lambda kv: kv["key"], data)
if type == "date":
keys = map(lambda key : datetime.strptime(key, "%Y-%m-%dT%H:%M:%S.000Z"), keys)
return list(keys)
def createPie(options):
data = options["plot"]["count"]
keys = genKeys(data, options["interpetKeysAs"])
values = list(map(lambda kv: kv["value"], data))
total = sum(map(lambda kv : kv["value"], data))
first_pct = data[0]["value"] / total
pyplot_init()
pyplot.pie(values, labels=keys, startangle=(90 - 360 * first_pct / 2))
def createBar(options):
data = options["plot"]["count"]
keys = genKeys(data, options["interpetKeysAs"])
values = list(map(lambda kv: kv["value"], data))
pyplot.bar(keys, values, label=options["name"])
pyplot.legend()
pyplot_init()
title = sys.argv[1]
pyplot.title = title
names = []
while(True):
line = sys.stdin.readline()
if line == "" or line == "\n":
if(len(names) > 1):
pyplot.legend(loc="upper left", ncol=3)
pyplot.savefig(title+".png", dpi=400, facecolor='w', edgecolor='w',
bbox_inches='tight')
break
options = json.loads(line)
print("Creating "+options["plot"]["type"]+" '"+options["name"]+"'")
names.append(options["name"])
if(options["plot"]["type"] == "pie"):
createPie(options)
elif(options["plot"]["type"] == "bar"):
createBar(options)
else:
print("Unkown type: "+options.type)

View file

@ -0,0 +1,620 @@
import {existsSync, readdirSync, readFileSync, writeFileSync} from "fs";
import ScriptUtils from "../../scripts/ScriptUtils";
import {Utils} from "../../Utils";
import {exec} from "child_process"
import {GeoOperations} from "../../Logic/GeoOperations";
class StatsDownloader {
private readonly startYear = 2020
private readonly startMonth = 5;
private readonly urlTemplate = "https://osmcha.org/api/v1/changesets/?date__gte={start_date}&date__lte={end_date}&page={page}&editor=mapcomplete&page_size=100"
private readonly _targetDirectory: string;
constructor(targetDirectory = ".") {
this._targetDirectory = targetDirectory;
}
public async DownloadStats() {
const currentYear = new Date().getFullYear()
const currentMonth = new Date().getMonth() + 1
for (let year = this.startYear; year <= currentYear; year++) {
for (let month = 1; month <= 12; month++) {
if (year === this.startYear && month < this.startMonth) {
continue;
}
if (year === currentYear && month > currentMonth) {
continue
}
const path = `${this._targetDirectory}/stats.${year}-${month}.json`
if (existsSync(path)) {
if ((month == currentMonth && year == currentYear)) {
console.log(`Force downloading ${year}-${month}`)
} else {
console.log(`Skipping ${year}-${month}: already exists`)
continue;
}
}
await this.DownloadStatsForMonth(year, month, path)
}
}
}
public async DownloadStatsForMonth(year: number, month: number, path: string) {
let page = 1;
let allFeatures = []
let endDate = `${year}-${Utils.TwoDigits(month + 1)}-01`
if (month == 12) {
endDate = `${year + 1}-01-01`
}
let url = this.urlTemplate.replace("{start_date}", year + "-" + Utils.TwoDigits(month) + "-01")
.replace("{end_date}", endDate)
.replace("{page}", "" + page)
let headers = {
'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0',
'Accept-Language': 'en-US,en;q=0.5',
'Referer': 'https://osmcha.org/?filters=%7B%22date__gte%22%3A%5B%7B%22label%22%3A%222020-07-05%22%2C%22value%22%3A%222020-07-05%22%7D%5D%2C%22editor%22%3A%5B%7B%22label%22%3A%22mapcomplete%22%2C%22value%22%3A%22mapcomplete%22%7D%5D%7D',
'Content-Type': 'application/json',
'Authorization': 'Token 6e422e2afedb79ef66573982012000281f03dc91',
'DNT': '1',
'Connection': 'keep-alive',
'TE': 'Trailers',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache'
}
while (url) {
ScriptUtils.erasableLog(`Downloading stats for ${year}-${month}, page ${page} ${url}`)
const result = await ScriptUtils.DownloadJSON(url, {
headers: headers
})
page++;
allFeatures.push(...result.features)
if (result.features === undefined) {
console.log("ERROR", result)
return
}
url = result.next
}
console.log(`Writing ${allFeatures.length} features to `, path, " ")
writeFileSync(path, JSON.stringify({
features: allFeatures
}, undefined, 2))
}
}
interface ChangeSetData {
"id": number,
"type": "Feature",
"geometry": {
"type": "Polygon",
"coordinates": [number, number][][]
},
"properties": {
"check_user": null,
"reasons": [],
"tags": [],
"features": [],
"user": string,
"uid": string,
"editor": string,
"comment": string,
"comments_count": number,
"source": string,
"imagery_used": string,
"date": string,
"reviewed_features": [],
"create": number,
"modify": number,
"delete": number,
"area": number,
"is_suspect": boolean,
"harmful": any,
"checked": boolean,
"check_date": any,
"metadata": {
"host": string,
"theme": string,
"imagery": string,
"language": string
}
}
}
const theme_remappings = {
"metamap": "maps",
"groen": "buurtnatuur",
"updaten van metadata met mapcomplete": "buurtnatuur",
"Toevoegen of dit natuurreservaat toegangkelijk is": "buurtnatuur",
"wiki:mapcomplete/fritures": "fritures",
"wiki:MapComplete/Fritures": "fritures",
"lits": "lit",
"pomp": "cyclofix",
"wiki:user:joost_schouppe/campersite": "campersite",
"wiki-user-joost_schouppe-geveltuintjes": "geveltuintjes",
"wiki-user-joost_schouppe-campersite": "campersite",
"wiki-User-joost_schouppe-campersite": "campersite",
"wiki-User-joost_schouppe-geveltuintjes": "geveltuintjes",
"wiki:User:joost_schouppe/campersite": "campersite",
"arbres": "arbres_llefia",
"aed_brugge": "aed",
"https://llefia.org/arbres/mapcomplete.json": "arbres_llefia",
"https://llefia.org/arbres/mapcomplete1.json": "arbres_llefia",
"toevoegen of dit natuurreservaat toegangkelijk is": "buurtnatuur",
"testing mapcomplete 0.0.0": "buurtnatuur",
"https://raw.githubusercontent.com/osmbe/play/master/mapcomplete/geveltuinen/geveltuinen.json": "geveltuintjes"
}
class ChangesetDataTools {
public static cleanChangesetData(cs: ChangeSetData): ChangeSetData {
if (cs.properties.metadata.theme === undefined) {
cs.properties.metadata.theme = cs.properties.comment.substr(cs.properties.comment.lastIndexOf("#") + 1)
}
cs.properties.metadata.theme = cs.properties.metadata.theme.toLowerCase()
const remapped = theme_remappings[cs.properties.metadata.theme]
cs.properties.metadata.theme = remapped ?? cs.properties.metadata.theme
if (cs.properties.metadata.theme.startsWith("https://raw.githubusercontent.com/")) {
cs.properties.metadata.theme = "gh://" + cs.properties.metadata.theme.substr("https://raw.githubusercontent.com/".length)
}
if (cs.properties.modify + cs.properties.delete + cs.properties.create == 0) {
cs.properties.metadata.theme = "EMPTY CS"
}
return cs
}
}
interface PlotSpec {
name: string,
interpetKeysAs: "date" | "number" | "string" | string
plot: {
type: "pie" | "bar"
count: { key: string, value: number }[]
} | {
type: "stacked-bar"
count: {
label: string,
values: { key: string | Date, value: number }[]
}[]
},
render()
}
function createGraph(
title: string,
...options: PlotSpec[]) {
const process = exec("python GenPlot.py \"graphs/" + title + "\"", ((error, stdout, stderr) => {
console.log("Python: ", stdout)
if (error !== null) {
console.error(error)
}
if (stderr !== "") {
console.error(stderr)
}
}))
for (const option of options) {
process.stdin._write(JSON.stringify(option) + "\n", "utf-8", undefined)
}
process.stdin._write("\n", "utf-8", undefined)
}
class Histogram<K> {
total(): number {
let total = 0
Array.from(this.counts.values()).forEach(i => total = total + i)
return total
}
public counts: Map<K, number> = new Map<K, number>()
private sortAtEnd: K[] = []
constructor(keys?: K[]) {
const self = this
keys?.forEach(key => self.bump(key))
}
public bump(key: K, increase = 1) {
if (this.counts.has(key)) {
this.counts.set(key, increase + this.counts.get(key))
} else {
this.counts.set(key, increase)
}
}
/**
* Adds all the values of the given histogram to this histogram
* @param hist
*/
public bumpHist(hist: Histogram<K>) {
const self = this
hist.counts.forEach((value, key) => {
self.bump(key, value)
})
}
/**
* Creates a new histogram. All entries with less then 'cutoff' count are lumped together into the 'other' category
*/
public createOthersCategory(otherName: K, cutoff: number | ((key: K, value: number) => boolean) = 15): Histogram<K> {
const hist = new Histogram<K>()
hist.sortAtEnd.push(otherName)
if (typeof cutoff === "number") {
this.counts.forEach((value, key) => {
if (value <= cutoff) {
hist.bump(otherName, value)
} else {
hist.bump(key, value)
}
})
} else {
this.counts.forEach((value, key) => {
if (cutoff(key, value)) {
hist.bump(otherName, value)
} else {
hist.bump(key, value)
}
})
}
return hist;
}
public addCountToName(): Histogram<string> {
const self = this;
const hist = new Histogram<string>()
hist.sortAtEnd = this.sortAtEnd.map(name => `${name} (${self.counts.get(name)})`)
this.counts.forEach((value, key) => {
hist.bump(`${key} (${value})`, value)
})
return hist;
}
public Clone(): Histogram<K> {
const hist = new Histogram<K>()
hist.bumpHist(this)
hist.sortAtEnd = [...this.sortAtEnd];
return hist;
}
public keyToDate(addMissingDays: boolean = false): Histogram<Date> {
const hist = new Histogram<Date>()
hist.sortAtEnd = this.sortAtEnd.map(name => new Date("" + name))
let earliest = undefined;
let latest = undefined;
this.counts.forEach((value, key) => {
const d = new Date("" + key);
if (earliest === undefined) {
earliest = d
} else if (d < earliest) {
earliest = d
}
if (latest === undefined) {
latest = d
} else if (d > latest) {
latest = d
}
hist.bump(d, value)
})
if (addMissingDays) {
while (earliest < latest) {
earliest.setDate(earliest.getDate() + 1)
hist.bump(earliest, 0)
}
}
return hist
}
/**
* Given a histogram:
* 'a': 3
* 'b': 5
* 'c': 3
* 'd': 1
*
* This will create a new histogram, which counts how much every count occurs, thus:
* 5: 1 // as only 'b' had 5 counts
* 3: 2 // as both 'a' and 'c' had 3 counts
* 1: 1 // as only 'd' has 1 count
*/
public binPerCount(): Histogram<number> {
const hist = new Histogram<number>()
this.counts.forEach((value) => {
hist.bump(value)
})
return hist;
}
public stringifyName(): Histogram<string> {
const hist = new Histogram<string>()
this.counts.forEach((value, key) => {
hist.bump("" + key, value)
})
return hist;
}
public asPie(options: {
name: string
compare?: (a: K, b: K) => number
}): PlotSpec {
const self = this
const entriesArray = Array.from(this.counts.entries())
let type: string = (typeof entriesArray[0][0])
if (entriesArray[0][0] instanceof Date) {
type = "date"
}
const entries = entriesArray.map(kv => {
return ({key: kv[0], value: kv[1]});
})
if (options.compare) {
entries.sort((a, b) => options.compare(a.key, b.key))
} else {
entries.sort((a, b) => b.value - a.value)
}
entries.sort((a, b) => self.sortAtEnd.indexOf(a.key) - self.sortAtEnd.indexOf(b.key))
const graph: PlotSpec = {
name: options.name,
interpetKeysAs: type,
plot: {
type: "pie",
count: entries.map(kv => {
if (kv.key instanceof Date) {
return ({key: kv.key.toISOString(), value: kv.value})
}
return ({key: kv.key + "", value: kv.value});
})
},
render: undefined
}
graph.render = () => createGraph(graph.name, graph)
return graph;
}
public asBar(options: {
name: string
compare?: (a: K, b: K) => number
}): PlotSpec {
const spec = this.asPie(options)
spec.plot.type = "bar"
return spec;
}
}
class Group<K, V> {
public groups: Map<K, V[]> = new Map<K, V[]>()
constructor(features?: any[], fkey?: (feature: any) => K, fvalue?: (feature: any) => V) {
const self = this;
features?.forEach(f => {
self.bump(fkey(f), fvalue(f))
})
}
public static createStackedBarChartPerDay(name: string, features: any, extractV: (feature: any) => string, minNeededTotal = 1): void {
const perDay = new Group<string, string>(
features,
f => f.properties.date.substr(0, 10),
extractV
)
createGraph(
name,
...Array.from(
stackHists<string, string>(
perDay.asGroupedHists()
.filter(tpl => tpl[1].total() > minNeededTotal)
.map(tpl => [`${tpl[0]} (${tpl[1].total()})`, tpl[1]])
)
)
.map(
tpl => {
const [name, hist] = tpl
return hist
.keyToDate(true)
.asBar({
name: name
});
}
)
)
}
public bump(key: K, value: V) {
if (!this.groups.has(key)) {
this.groups.set(key, [])
}
this.groups.get(key).push(value)
}
public asHist(dedup = false): Histogram<K> {
const hist = new Histogram<K>()
this.groups.forEach((values, key) => {
if (dedup) {
hist.bump(key, new Set(values).size)
} else {
hist.bump(key, values.length)
}
})
return hist
}
asGroupedHists(): [V, Histogram<K>][] {
const allHists = new Map<V, Histogram<K>>()
const allValues = new Set<V>();
Array.from(this.groups.values()).forEach(vs =>
vs.forEach(v => {
allValues.add(v)
})
)
allValues.forEach(v => allHists.set(v, new Histogram<K>()))
this.groups.forEach((values, key) => {
values.forEach(v => {
allHists.get(v).bump(key)
})
})
return Array.from(allHists.entries())
}
}
function stackHists<K, V>(hists: [V, Histogram<K>][]): [V, Histogram<K>][] {
const runningTotals = new Histogram<K>()
const result: [V, Histogram<K>][] = []
hists.forEach(vhist => {
const hist = vhist[1]
const clone = hist.Clone()
clone.bumpHist(runningTotals)
runningTotals.bumpHist(hist)
result.push([vhist[0], clone])
})
result.reverse()
return result
}
function createGraphs(allFeatures: ChangeSetData[], appliedFilterDescription: string) {
const hist = new Histogram<string>(allFeatures.map(f => f.properties.metadata.theme))
hist
.addCountToName()
.createOthersCategory("other", 40)
.asPie({
name: "Changesets per theme" + appliedFilterDescription
}).render()
hist
.createOthersCategory("other", 20)
.addCountToName()
.asBar({name: "Changesets per theme (bar)" + appliedFilterDescription}).render()
new Histogram<string>(allFeatures.map(f => f.properties.user))
.binPerCount()
.stringifyName()
.createOthersCategory("25 or more", (key, _) => Number(key) >= 25).asBar(
{
compare: (a, b) => Number(a) - Number(b),
name: "Contributors per changeset count" + appliedFilterDescription
}).render()
new Histogram<string>(allFeatures.map(f => f.properties.metadata.host))
.asPie({
name: "Changesets per host"
}).render()
Group.createStackedBarChartPerDay(
"Changesets per theme" + appliedFilterDescription,
allFeatures,
f => f.properties.metadata.theme,
25
)
Group.createStackedBarChartPerDay(
"Changesets per version number" + appliedFilterDescription,
allFeatures,
f => f.properties.editor.substr("MapComplete ".length, 6).replace(/[a-zA-Z-/]/g, ''),
1
)
{
// Contributors (unique + unique new) per day
const contributorCountPerDay = new Group<string, string>()
const newContributorsPerDay = new Group<string, string>()
const seenContributors = new Set<string>()
allFeatures.forEach(f => {
const user = f.properties.user
const day = f.properties.date.substr(0, 10)
contributorCountPerDay.bump(day, user)
if (!seenContributors.has(user)) {
seenContributors.add(user)
newContributorsPerDay.bump(day, user)
}
})
const total = new Set(allFeatures.map(f => f.properties.user)).size
createGraph(
`Contributors per day${appliedFilterDescription} (${total} total contributors)`,
contributorCountPerDay
.asHist(true)
.keyToDate(true)
.asBar({
name: "Unique contributors per day"
}),
newContributorsPerDay
.asHist(true)
.keyToDate(true)
.asBar({
name: "New, unique contributors per day"
}),
)
}
}
// new StatsDownloader("stats").DownloadStats()
const allPaths = readdirSync("stats")
.filter(p => p.startsWith("stats.") && p.endsWith(".json"));
let allFeatures: ChangeSetData[] = [].concat(...allPaths
.map(path => JSON.parse(readFileSync("stats/" + path, "utf-8")).features
.map(cs => ChangesetDataTools.cleanChangesetData(cs))));
const emptyCS = allFeatures.filter(f => f.properties.metadata.theme === "EMPTY CS")
allFeatures = allFeatures.filter(f => f.properties.metadata.theme !== "EMPTY CS")
new Histogram(emptyCS.map(f => f.properties.date)).keyToDate().asBar({
name: "Empty changesets by date"
}).render()
writeFileSync("centerpoints.geojson", JSON.stringify({
type: "FeatureCollection",
features: allFeatures.map(f => {
try {
return GeoOperations.centerpoint(f.geometry);
} catch (e) {
console.error("Could not create center point: ", e, f)
}
})
}))
createGraphs(allFeatures, "")
createGraphs(allFeatures.filter(f => f.properties.date.startsWith("2020")), " in 2020")
createGraphs(allFeatures.filter(f => f.properties.date.startsWith("2021")), " in 2021")

Binary file not shown.

Before

Width:  |  Height:  |  Size: 350 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 628 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 678 KiB

File diff suppressed because one or more lines are too long

View file

@ -1,5 +0,0 @@
#! /bin/bash
./fetchStats.sh
./csvPerChange.sh
python3 csvGrapher.py

View file

@ -1,479 +0,0 @@
import csv
from datetime import datetime
from matplotlib import pyplot
import re
useLegend = True
def counts(lst):
counts = {}
for v in lst:
if not v in counts:
counts[v] = 0
counts[v] += 1
return counts
class Hist:
def __init__(self, firstcolumn):
self.key = "\"" + firstcolumn + "\""
self.dictionary = {}
self.key = ""
def add(self, key, value):
if not key in self.dictionary:
self.dictionary[key] = []
self.dictionary[key].append(value)
def values(self):
allV = []
for v in self.dictionary.values():
allV += list(set(v))
return list(set(allV))
def keys(self):
return self.dictionary.keys()
def get(self, key):
if key in self.dictionary:
return self.dictionary[key]
return None
# Returns values.map(f).
def map(self, f):
vals = []
keys = self.keys()
for key in keys:
vals.append(f(self.get(key)))
return vals
def mapcumul(self, f, add, zero):
vals = []
running_value = zero
keys = self.keys()
for key in keys:
v = f(self.get(key))
running_value = add(running_value, v)
vals.append(running_value)
return vals
# Returns [(key, flatten(values))] To be used with e.g. pyplot.plot
def flatten(self, flatten):
result = []
keys = self.keys()
for key in keys:
v = flatten(self.get(key))
result.append((key, v))
return result
def csv(self):
csv = self.key + "," + ",".join(self.values())
header = self.values()
for k in self.dictionary.keys():
csv += k
values = counts(self.dictionary[k])
for head in header:
if head in values:
csv += "," + str(values[head])
else:
csv += ",0"
csv += "\n"
return csv
def __str__(self):
return str(self.dictionary)
def build_hist(stats, keyIndex, valueIndex):
hist = Hist("date")
c = 0
for row in stats:
c += 1
hist.add(row[keyIndex], row[valueIndex])
return hist
def as_date(str):
return datetime.strptime(str, "%Y-%m-%d")
def cumulative_users(stats):
users_hist = build_hist(stats, 0, 1)
all_users_per_day = users_hist.mapcumul(
lambda users: set(users),
lambda a, b: a.union(b),
set([])
)
cumul_uniq = list(map(len, all_users_per_day))
unique_per_day = users_hist.map(lambda users: len(set(users)))
new_users = [0]
for i in range(len(cumul_uniq) - 1):
new_users.append(cumul_uniq[i + 1] - cumul_uniq[i])
dates = map(as_date, users_hist.keys())
return list(dates), cumul_uniq, list(unique_per_day), list(new_users)
def pyplot_init():
pyplot.close('all')
pyplot.figure(figsize=(14, 8), dpi=200)
pyplot.xticks(rotation='vertical')
pyplot.grid()
def create_usercount_graphs(stats, extra_text=""):
print("Creating usercount graphs " + extra_text)
dates, cumul_uniq, unique_per_day, new_users = cumulative_users(stats)
total = cumul_uniq[-1]
pyplot_init()
pyplot.bar(dates, unique_per_day, label='Unique contributors')
pyplot.bar(dates, new_users, label='First time contributor via MapComplete')
if (useLegend):
pyplot.legend()
pyplot.title("Unique contributors" + extra_text + ' with MapComplete (' + str(total) + ' contributors)')
pyplot.ylabel("Number of unique contributors")
pyplot.xlabel("Date")
pyplot.savefig("Contributors" + extra_text + ".png", dpi=400, facecolor='w', edgecolor='w')
pyplot_init()
pyplot.plot(dates, cumul_uniq, label='Cumulative unique contributors')
if (useLegend):
pyplot.legend()
pyplot.title("Cumulative unique contributors" + extra_text + " with MapComplete - " + str(total) + " contributors")
pyplot.ylabel("Number of unique contributors")
pyplot.xlabel("Date")
pyplot.savefig("CumulativeContributors" + extra_text + ".png", dpi=400, facecolor='w', edgecolor='w')
def create_contributors_per_total_cs(contents, extra_text="", cutoff=25, per_day=False):
hist = Hist("contributor")
for cs in contents:
hist.add(cs[1], cs[0])
count_per_contributor = hist.map(lambda dates: len(set(dates))) if per_day else hist.map(len)
per_count = Hist("per cs count")
for cs_count in count_per_contributor:
per_count.add(min(cs_count, cutoff), 1)
to_plot = per_count.flatten(len)
to_plot.sort(key=lambda a: a[0])
to_plot[- 1] = (str(cutoff) + " or more", to_plot[-1][1])
pyplot_init()
pyplot.bar(list(map(lambda a: str(a[0]), to_plot)), list(map(lambda a: a[1], to_plot)))
pyplot.title("Contributors per total number of changesets" + extra_text)
pyplot.ylabel("Number of contributors")
pyplot.xlabel("Mapping days with MapComplete" if per_day else "Number of changesets with MapComplete")
pyplot.savefig(
"Contributors per total number of " + ("mapping days" if per_day else "changesets") + extra_text + ".png",
dpi=400)
def create_theme_breakdown(stats, fileExtra="", cutoff=15):
print("Creating theme breakdown " + fileExtra)
themeCounts = {}
for row in stats:
theme = row[3].lower()
if theme in theme_remappings:
theme = theme_remappings[theme]
if theme in themeCounts:
themeCounts[theme] += 1
else:
themeCounts[theme] = 1
themes = list(themeCounts.items())
if len(themes) == 0:
print("No entries found for theme breakdown (extra: " + str(fileExtra) + ")")
return
themes.sort(key=lambda kv: kv[1], reverse=True)
other_count = sum([theme[1] for theme in themes if theme[1] < cutoff])
themes_filtered = [theme for theme in themes if theme[1] >= cutoff]
keys = list(map(lambda kv: kv[0] + " (" + str(kv[1]) + ")", themes_filtered))
values = list(map(lambda kv: kv[1], themes_filtered))
total = sum(map(lambda kv: kv[1], themes))
first_pct = themes[0][1] / total;
if other_count > 0:
keys.append("other")
values.append(other_count)
pyplot_init()
pyplot.pie(values, labels=keys, startangle=(90 - 360 * first_pct / 2))
pyplot.title("MapComplete changes per theme" + fileExtra + " - " + str(total) + " total changes")
pyplot.savefig("Theme distribution" + fileExtra + ".png", dpi=400, facecolor='w', edgecolor='w',
bbox_inches='tight')
return themes
def summed_changes_per(contents, extraText, sum_column=5):
newPerDay = build_hist(contents, 0, 5)
kv = newPerDay.flatten(sum)
keysNew = list(map(lambda kv: as_date(kv[0]), kv))
valuesNew = list(map(lambda kv: kv[1], kv))
changedPerDay = build_hist(contents, 0, 6)
kv = changedPerDay.flatten(sum)
keysChanged = list(map(lambda kv: as_date(kv[0]), kv))
valuesChanged = list(map(lambda kv: kv[1], kv))
if len(keysChanged) == 0 and len(keysNew) == 0:
return
pyplot_init()
text = "New and changed nodes per day " + extraText
pyplot.title(text)
if len(keysChanged) > 0:
pyplot.bar(keysChanged, valuesChanged, label="Changed")
if len(keysNew) > 0:
pyplot.bar(keysNew, valuesNew, label="New")
if (useLegend):
pyplot.legend()
pyplot.savefig(text)
def cumulative_changes_per(contents, index, subject, filenameextra="", cutoff=5, cumulative=True, sort=True):
print("Creating graph about " + subject + filenameextra)
themes = Hist("date")
dates_per_theme = Hist("theme")
all_themes = set()
for row in contents:
th = row[index]
all_themes.add(th)
themes.add(as_date(row[0]), th)
dates_per_theme.add(th, row[0])
per_theme_count = list(zip(dates_per_theme.keys(), dates_per_theme.map(len)))
# PerThemeCount gives the most popular theme first
if sort == True:
per_theme_count.sort(key=lambda kv: kv[1], reverse=False)
elif sort is not None:
per_theme_count.sort(key=sort)
values_to_show = [] # (theme name, value to fill between - this is stacked, with the first layer to print last)
running_totals = None
other_total = 0
other_theme_count = 0
other_cumul = None
for kv in per_theme_count:
theme = kv[0]
total_for_this_theme = kv[1]
if cumulative:
edits_per_day_cumul = themes.mapcumul(
lambda themes_for_date: len([x for x in themes_for_date if theme == x]),
lambda a, b: a + b, 0)
else:
edits_per_day_cumul = themes.map(lambda themes_for_date: len([x for x in themes_for_date if theme == x]))
if (not cumulative) or (running_totals is None):
running_totals = edits_per_day_cumul
else:
running_totals = list(map(lambda ab: ab[0] + ab[1], zip(running_totals, edits_per_day_cumul)))
if total_for_this_theme >= cutoff:
values_to_show.append((theme, running_totals))
else:
other_total += total_for_this_theme
other_theme_count += 1
if other_cumul is None:
other_cumul = edits_per_day_cumul
else:
other_cumul = list(map(lambda ab: ab[0] + ab[1], zip(other_cumul, edits_per_day_cumul)))
keys = list(themes.keys())
values_to_show.reverse()
values_to_show.append(("other", other_cumul))
totals = dict(per_theme_count)
total = sum(totals.values())
totals["other"] = other_total
pyplot_init()
for kv in values_to_show:
if kv[1] is None:
continue # No 'other' graph
msg = kv[0] + " (" + str(totals[kv[0]]) + ")"
if kv[0] == "other":
msg = str(other_theme_count) + " small " + subject + "s (" + str(other_total) + " changes)"
if cumulative:
pyplot.fill_between(keys, kv[1], label=msg)
else:
pyplot.bar(keys, kv[1], label=msg)
if cumulative:
cumulative_txt = "Cumulative changesets"
else:
cumulative_txt = "Changesets"
pyplot.title(cumulative_txt + " per " + subject + filenameextra + " (" + str(total) + " changesets)")
if (useLegend):
pyplot.legend(loc="upper left", ncol=3)
pyplot.savefig(cumulative_txt + " per " + subject + filenameextra + ".png")
def contents_where(contents, index, starts_with, invert=False):
for row in contents:
if row[index].startswith(starts_with) is not invert:
yield row
def sortable_user_number(kv):
str = kv[0]
ls = list(map(lambda str: "0" + str if len(str) < 2 else str, re.findall("[0-9]+", str)))
return ".".join(ls)
def create_graphs(contents):
# summed_changes_per(contents, "")
create_contributors_per_total_cs(contents)
create_contributors_per_total_cs(contents, per_day=True)
cumulative_changes_per(contents, 4, "version number", cutoff=1, sort=sortable_user_number)
create_usercount_graphs(contents)
create_theme_breakdown(contents)
cumulative_changes_per(contents, 3, "created element", cutoff=10)
cumulative_changes_per(contents, 3, "theme", cutoff=10)
cumulative_changes_per(contents, 3, "theme", cutoff=10, cumulative=False)
cumulative_changes_per(contents, 1, "contributor", cutoff=15)
cumulative_changes_per(contents, 2, "language", cutoff=1)
cumulative_changes_per(contents, 8, "host", cutoff=1)
currentYear = datetime.now().year
for year in range(2020, currentYear + 1):
contents_filtered = list(contents_where(contents, 0, str(year)))
extratext = " in " + str(year)
create_contributors_per_total_cs(contents_filtered, extratext)
create_contributors_per_total_cs(contents_filtered, extratext, per_day=True)
create_usercount_graphs(contents_filtered, extratext)
create_theme_breakdown(contents_filtered, extratext)
cumulative_changes_per(contents_filtered, 3, "theme", extratext, cutoff=5)
cumulative_changes_per(contents_filtered, 3, "theme", extratext, cutoff=5, cumulative=False)
cumulative_changes_per(contents_filtered, 1, "contributor", extratext, cutoff=10)
cumulative_changes_per(contents_filtered, 2, "language", extratext, cutoff=1)
cumulative_changes_per(contents_filtered, 4, "version number", extratext, cutoff=1, cumulative=False,
sort=sortable_user_number)
cumulative_changes_per(contents_filtered, 4, "version number", extratext, cutoff=1, sort=sortable_user_number)
cumulative_changes_per(contents_filtered, 8, "host", extratext, cutoff=1)
# summed_changes_per(contents_filtered, "for year " + str(year))
def create_per_theme_graphs(contents, cutoff=10):
all_themes = set(map(lambda row: row[3], contents))
for theme in all_themes:
filtered = list(contents_where(contents, 3, theme))
if len(filtered) < cutoff:
# less then 10 changesets - we do not map it
continue
contributors = set(map(lambda row: row[1], filtered))
if len(contributors) >= 2:
cumulative_changes_per(filtered, 1, "contributor", " for theme " + theme, cutoff=1)
# if len(filtered) > 25:
# summed_changes_per(filtered, "for theme " + theme)
def create_per_contributor_graphs(contents, least_needed_changesets):
all_contributors = set(map(lambda row: row[1], contents))
for contrib in all_contributors:
filtered = list(contents_where(contents, 1, contrib))
if len(filtered) < least_needed_changesets:
print("Skipping " + contrib + " - too little changesets");
continue
themes = set(map(lambda row: row[3], filtered))
if len(themes) >= 2:
cumulative_changes_per(filtered, 3, "theme", " for contributor " + contrib, cutoff=1)
# if len(filtered) > 25:
# summed_changes_per(filtered, "for contributor " + contrib)
theme_remappings = {
"metamap": "maps",
"groen": "buurtnatuur",
"updaten van metadata met mapcomplete": "buurtnatuur",
"Toevoegen of dit natuurreservaat toegangkelijk is": "buurtnatuur",
"wiki:mapcomplete/fritures": "fritures",
"wiki:MapComplete/Fritures": "fritures",
"lits": "lit",
"pomp": "cyclofix",
"wiki:user:joost_schouppe/campersite": "campersite",
"wiki-user-joost_schouppe-geveltuintjes": "geveltuintjes",
"wiki-user-joost_schouppe-campersite": "campersite",
"wiki-User-joost_schouppe-campersite": "campersite",
"wiki-User-joost_schouppe-geveltuintjes": "geveltuintjes",
"wiki:User:joost_schouppe/campersite": "campersite",
"arbres": "arbres_llefia",
"aed_brugge": "aed",
"https://llefia.org/arbres/mapcomplete.json": "arbres_llefia",
"https://llefia.org/arbres/mapcomplete1.json": "arbres_llefia",
"toevoegen of dit natuurreservaat toegangkelijk is": "buurtnatuur",
"testing mapcomplete 0.0.0": "buurtnatuur",
"https://raw.githubusercontent.com/osmbe/play/master/mapcomplete/geveltuinen/geveltuinen.json": "geveltuintjes"
}
def clean_input(contents):
for row in contents:
theme = row[3].strip().strip("\"").lower()
if theme == "null":
# The theme metadata has only been set later on - we fetch this from the comment
i = row[7].rfind("#")
theme = row[7][i + 1:-1].lower()
if theme in theme_remappings:
theme = theme_remappings[theme]
if theme.rfind('/') > 0:
theme = theme[theme.rfind('/') + 1:]
row[3] = theme
row[4] = row[4].strip().strip("\"")[len("MapComplete "):]
row[4] = re.findall("[0-9]*\.[0-9]*\.[0-9]*", row[4])[0]
row = [data.strip().strip("\"") for data in row]
row[5] = int(row[5])
row[6] = int(row[6])
yield row
# Merges changesets of the same theme and the samecontributos within the same hour, so that the stats are comparable
def mergeChangesets(contents):
open_changesets = dict() # {contributor --> {theme --> hour of last change}}
for row in contents:
theme = row[3]
contributor = row[1]
date = datetime.strptime(row[0], "%Y-%m-%dT%H:%M:%SZ")
if (contributor not in open_changesets):
open_changesets[contributor] = dict()
perTheme = open_changesets[contributor]
if (theme in perTheme):
lastChange = perTheme[theme]
diff = (date - lastChange).total_seconds()
if(diff > 60*60):
yield row
else:
yield row
perTheme[theme] = date
# Removes the time from the date component
def datesOnly(contents):
for row in contents:
row[0] = row[0].split("T")[0]
def contributor_count(stats, index=1, item="contributor"):
seen_contributors = set()
for line in stats:
contributor = line[index]
if (contributor in seen_contributors):
continue
print("New " + item + " " + str(len(seen_contributors) + 1) + ": " + contributor)
seen_contributors.add(contributor)
print(line)
def main():
print("Creating graphs...")
with open('stats.csv', newline='') as csvfile:
stats = list(clean_input(csv.reader(csvfile, delimiter=',', quotechar='"')))
stats = list(mergeChangesets(stats))
datesOnly(stats)
print("Found " + str(len(stats)) + " changesets")
# contributor_count(stats, 3, "theme")
create_graphs(stats)
create_per_theme_graphs(stats, 15)
# create_per_contributor_graphs(stats, 25)
print("All done!")
main()

View file

@ -1,22 +0,0 @@
#! /bin/bash
if [[ ! -e stats.1.json ]]
then
echo "No stats found - not compiling"
exit
fi
rm stats.csv
# echo "date, username, language, theme, editor, creations, changes" > stats.csv
echo "" > tmp.csv
for f in stats.*.json
do
echo $f
jq ".features[].properties | [.date, .user, .metadata.language, .metadata.theme, .editor, .create, .modify, .comment, .metadata.host]" "$f" | tr -d "\n" | sed "s/]\[/\n/g" | tr -d "][" >> tmp.csv
echo "" >> tmp.csv
done
sed "/^$/d" tmp.csv | sed "s/^ //" | sed "s/ / /g" | sort > stats-latest.csv
cat stats2020.csv stats2021Q1.csv stats-latest.csv > stats.csv
rm tmp.csv stats-latest.csv

View file

@ -1,26 +0,0 @@
DATE=$(date +"%Y-%m-%d%%20%H%%3A%M")
COUNTER=1
if [[ $1 != "" ]]
then
echo "Starting at $1"
COUNTER="$1"
fi
NEXT_URL=$(echo "https://osmcha.org/api/v1/changesets/?date__gte=2021-07-01&date__lte=$DATE&editor=mapcomplete&page=$COUNTER&page_size=1000")
rm stats.*.json
while [[ "$NEXT_URL" != "null" ]]
do
echo "$COUNTER '$NEXT_URL'"
$(curl "$NEXT_URL" --silent -H 'User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0' -H 'Accept: */*' -H 'Accept-Language: en-US,en;q=0.5' --compressed -H 'Referer: https://osmcha.org/?filters=%7B%22date__gte%22%3A%5B%7B%22label%22%3A%222020-07-05%22%2C%22value%22%3A%222020-07-05%22%7D%5D%2C%22editor%22%3A%5B%7B%22label%22%3A%22mapcomplete%22%2C%22value%22%3A%22mapcomplete%22%7D%5D%7D' -H 'Content-Type: application/json' -H 'Authorization: Token 6e422e2afedb79ef66573982012000281f03dc91' -H 'DNT: 1' -H 'Connection: keep-alive' -H 'TE: Trailers' -H 'Pragma: no-cache' -H 'Cache-Control: no-cache' -o stats.$COUNTER.json)
if [ "$?" -eq 0 ];
then
NEXT_URL=$(jq ".next" stats.$COUNTER.json | sed "s/\"//g")
let COUNTER++
else
echo "Something failed - exiting now"
exit
fi
done;

Binary file not shown.

After

Width:  |  Height:  |  Size: 231 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 477 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 512 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 175 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 374 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 328 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 433 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 184 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 348 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 416 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 144 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 133 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 145 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 117 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 118 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 138 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 99 KiB

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

Some files were not shown because too many files have changed in this diff Show more