Fancy stats!
30
Docs/Stats.md
Normal file
|
@ -0,0 +1,30 @@
|
|||
Statistics
|
||||
==========
|
||||
|
||||
There are some fancy statistics available about MapComplete use. You can find all the stats (and the scripts to generate them) [here](Docs/Tools/)
|
||||
|
||||
All Time usage
|
||||
--------------
|
||||
|
||||
![Cumultive contributors](Docs/Tools/CumulativeContributors.png)
|
||||
![Cumulative changesets per contributor](Docs/Tools/Cumulative changesets per contributor.png)
|
||||
|
||||
Note: in 2020, MapComplete would still make one changeset per answered question. This heavily skews the below graphs towards `buurtnatuur` and `cyclofìx`, two heavily used themes at the beginning.
|
||||
![Cumulative changesets per theme](Cumulative changesets per theme.png)
|
||||
![Theme distribution](Docs/Tools/Theme distribution.png)
|
||||
|
||||
2020
|
||||
----
|
||||
|
||||
![Cumultive contributors](Docs/Tools/CumulativeContributors in 2020.png)
|
||||
![Cumulative changesets per contributor](Docs/Tools/Cumulative changesets per contributor in 2020.png)
|
||||
![Cumulative changesets per theme](Cumulative changesets per theme in 2020.png)
|
||||
![Theme distribution](Docs/Tools/Theme distribution in 2020.png)
|
||||
|
||||
2021
|
||||
----
|
||||
|
||||
![Cumultive contributors](Docs/Tools/CumulativeContributors in 2021.png)
|
||||
![Cumulative changesets per contributor](Docs/Tools/Cumulative changesets per contributor in 2021.png)
|
||||
![Cumulative changesets per theme](Cumulative changesets per theme in 2021.png)
|
||||
![Theme distribution](Docs/Tools/Theme distribution in 2021.png)
|
BIN
Docs/Tools/Changesets per version number in 2020.png
Normal file
After Width: | Height: | Size: 388 KiB |
BIN
Docs/Tools/Changesets per version number in 2021.png
Normal file
After Width: | Height: | Size: 341 KiB |
Before Width: | Height: | Size: 496 KiB After Width: | Height: | Size: 493 KiB |
Before Width: | Height: | Size: 644 KiB After Width: | Height: | Size: 645 KiB |
BIN
Docs/Tools/Cumulative changesets per contributor in 2020.png
Normal file
After Width: | Height: | Size: 708 KiB |
BIN
Docs/Tools/Cumulative changesets per contributor in 2021.png
Normal file
After Width: | Height: | Size: 217 KiB |
BIN
Docs/Tools/Cumulative changesets per contributor.png
Normal file
After Width: | Height: | Size: 602 KiB |
BIN
Docs/Tools/Cumulative changesets per language in 2020.png
Normal file
After Width: | Height: | Size: 104 KiB |
BIN
Docs/Tools/Cumulative changesets per language in 2021.png
Normal file
After Width: | Height: | Size: 102 KiB |
BIN
Docs/Tools/Cumulative changesets per language.png
Normal file
After Width: | Height: | Size: 109 KiB |
BIN
Docs/Tools/Cumulative changesets per theme in 2020.png
Normal file
After Width: | Height: | Size: 279 KiB |
BIN
Docs/Tools/Cumulative changesets per theme in 2021.png
Normal file
After Width: | Height: | Size: 301 KiB |
BIN
Docs/Tools/Cumulative changesets per theme.png
Normal file
After Width: | Height: | Size: 310 KiB |
BIN
Docs/Tools/Cumulative changesets per version number in 2020.png
Normal file
After Width: | Height: | Size: 528 KiB |
BIN
Docs/Tools/Cumulative changesets per version number in 2021.png
Normal file
After Width: | Height: | Size: 224 KiB |
BIN
Docs/Tools/Cumulative changesets per version number.png
Normal file
After Width: | Height: | Size: 710 KiB |
Before Width: | Height: | Size: 273 KiB After Width: | Height: | Size: 272 KiB |
Before Width: | Height: | Size: 269 KiB After Width: | Height: | Size: 270 KiB |
Before Width: | Height: | Size: 535 KiB After Width: | Height: | Size: 599 KiB |
Before Width: | Height: | Size: 550 KiB After Width: | Height: | Size: 551 KiB |
Before Width: | Height: | Size: 760 KiB After Width: | Height: | Size: 758 KiB |
|
@ -4,10 +4,6 @@ from datetime import datetime
|
|||
from matplotlib import pyplot
|
||||
|
||||
|
||||
def clean(s):
|
||||
return s.strip().strip("\"")
|
||||
|
||||
|
||||
def counts(lst):
|
||||
counts = {}
|
||||
for v in lst:
|
||||
|
@ -75,21 +71,25 @@ class Hist:
|
|||
csv += "\n"
|
||||
return csv
|
||||
|
||||
def __str__(self):
|
||||
return str(self.dictionary)
|
||||
|
||||
def build_hist(stats, keyIndex, valueIndex, condition=None):
|
||||
|
||||
def build_hist(stats, keyIndex, valueIndex):
|
||||
hist = Hist("date")
|
||||
c = 0
|
||||
for row in stats:
|
||||
if condition is not None and not condition(row):
|
||||
continue
|
||||
c += 1
|
||||
row = list(map(clean, row))
|
||||
hist.add(row[keyIndex], row[valueIndex])
|
||||
return hist
|
||||
|
||||
|
||||
def cumulative_users(stats, year=""):
|
||||
users_hist = build_hist(stats, 0, 1, lambda row: row[0].startswith(year))
|
||||
def as_date(str):
|
||||
return datetime.strptime(str, "%Y-%m-%d")
|
||||
|
||||
|
||||
def cumulative_users(stats):
|
||||
users_hist = build_hist(stats, 0, 1)
|
||||
all_users_per_day = users_hist.mapcumul(
|
||||
lambda users: set(users),
|
||||
lambda a, b: a.union(b),
|
||||
|
@ -100,7 +100,7 @@ def cumulative_users(stats, year=""):
|
|||
new_users = [0]
|
||||
for i in range(len(cumul_uniq) - 1):
|
||||
new_users.append(cumul_uniq[i + 1] - cumul_uniq[i])
|
||||
dates = map(lambda dt: datetime.strptime(dt, "%Y-%m-%d"), users_hist.keys())
|
||||
dates = map(as_date, users_hist.keys())
|
||||
return list(dates), cumul_uniq, list(unique_per_day), list(new_users)
|
||||
|
||||
|
||||
|
@ -110,66 +110,35 @@ def pyplot_init():
|
|||
pyplot.tight_layout()
|
||||
|
||||
|
||||
def create_usercount_graphs(stats, year="", show=False):
|
||||
print("Creating usercount graphs "+year)
|
||||
dates, cumul_uniq, unique_per_day, new_users = cumulative_users(stats, year)
|
||||
def create_usercount_graphs(stats, extra_text=""):
|
||||
print("Creating usercount graphs " + extra_text)
|
||||
dates, cumul_uniq, unique_per_day, new_users = cumulative_users(stats)
|
||||
total = cumul_uniq[-1]
|
||||
|
||||
if year != "":
|
||||
year = " in " + year
|
||||
pyplot_init()
|
||||
pyplot.fill_between(dates, unique_per_day, label='Unique contributors')
|
||||
pyplot.fill_between(dates, new_users, label='First time contributor via MapComplete')
|
||||
pyplot.legend()
|
||||
pyplot.title("Unique contributors" + year + ' with MapComplete (' + str(total) + ' contributors)')
|
||||
pyplot.title("Unique contributors" + extra_text + ' with MapComplete (' + str(total) + ' contributors)')
|
||||
pyplot.ylabel("Number of unique contributors")
|
||||
pyplot.xlabel("Date")
|
||||
if show:
|
||||
pyplot.show()
|
||||
else:
|
||||
pyplot.savefig("Contributors" + year + ".png", dpi=400, facecolor='w', edgecolor='w', bbox_inches='tight')
|
||||
pyplot.savefig("Contributors" + extra_text + ".png", dpi=400, facecolor='w', edgecolor='w', bbox_inches='tight')
|
||||
|
||||
pyplot_init()
|
||||
pyplot.plot(dates, cumul_uniq, label='Cumulative unique contributors')
|
||||
pyplot.legend()
|
||||
pyplot.title("Cumulative unique contributors" + year + " with MapComplete - " + str(total) + " contributors")
|
||||
pyplot.title("Cumulative unique contributors" + extra_text + " with MapComplete - " + str(total) + " contributors")
|
||||
pyplot.ylabel("Number of unique contributors")
|
||||
pyplot.xlabel("Date")
|
||||
if show:
|
||||
pyplot.show()
|
||||
else:
|
||||
pyplot.savefig("CumulativeContributors" + year + ".png", dpi=400, facecolor='w', edgecolor='w',
|
||||
bbox_inches='tight')
|
||||
pyplot.savefig("CumulativeContributors" + extra_text + ".png", dpi=400, facecolor='w', edgecolor='w',
|
||||
bbox_inches='tight')
|
||||
|
||||
|
||||
def create_yearly_usercount_graphs(contents):
|
||||
create_usercount_graphs(contents)
|
||||
currentYear = datetime.now().year
|
||||
for year in range(2020, currentYear + 1):
|
||||
create_usercount_graphs(contents, str(year))
|
||||
|
||||
|
||||
theme_remappings = {
|
||||
"null": "buurtnatuur",
|
||||
"metamap": "maps",
|
||||
"wiki:mapcomplete/fritures": "fritures",
|
||||
"lits": "lit",
|
||||
"wiki:user:joost_schouppe/campersite": "campersite",
|
||||
"wiki-user-joost_schouppe-geveltuintjes": "geveltuintjes",
|
||||
"wiki-user-joost_schouppe-campersite":"campersites",
|
||||
"https://raw.githubusercontent.com/osmbe/play/master/mapcomplete/geveltuinen/geveltuinen.json": "geveltuintjes"
|
||||
}
|
||||
|
||||
|
||||
def create_theme_breakdown(stats, year="", user=None, columnIndex=3):
|
||||
print("Creating theme breakdown "+year)
|
||||
def create_theme_breakdown(stats, fileExtra="", cutoff=5):
|
||||
print("Creating theme breakdown " + fileExtra)
|
||||
themeCounts = {}
|
||||
for row in stats:
|
||||
if not row[0].startswith(year):
|
||||
continue
|
||||
if user is not None and clean(row[1]) != user:
|
||||
continue
|
||||
theme = clean(row[columnIndex]).lower()
|
||||
theme = row[3].lower()
|
||||
if theme in theme_remappings:
|
||||
theme = theme_remappings[theme]
|
||||
if theme in themeCounts:
|
||||
|
@ -178,55 +147,157 @@ def create_theme_breakdown(stats, year="", user=None, columnIndex=3):
|
|||
themeCounts[theme] = 1
|
||||
themes = list(themeCounts.items())
|
||||
if len(themes) == 0:
|
||||
print("No entries found for user "+user+" in "+year)
|
||||
print("No entries found for theme breakdown (extra: " + str(fileExtra) + ")")
|
||||
return
|
||||
themes.sort(key=lambda kv : kv[1], reverse=True)
|
||||
|
||||
cutoff = 5
|
||||
if user is not None:
|
||||
cutoff = 0
|
||||
themes.sort(key=lambda kv: kv[1], reverse=True)
|
||||
other_count = sum([theme[1] for theme in themes if theme[1] < cutoff])
|
||||
themes_filtered = [theme for theme in themes if theme[1] >= cutoff]
|
||||
keys = list(map(lambda kv : kv[0] + " (" + str(kv[1])+")", themes_filtered))
|
||||
values = list(map(lambda kv : kv[1], themes_filtered))
|
||||
total =sum(map(lambda kv:kv[1], themes))
|
||||
keys = list(map(lambda kv: kv[0] + " (" + str(kv[1]) + ")", themes_filtered))
|
||||
values = list(map(lambda kv: kv[1], themes_filtered))
|
||||
total = sum(map(lambda kv: kv[1], themes))
|
||||
first_pct = themes[0][1] / total;
|
||||
if year != "":
|
||||
year = " in " + year
|
||||
|
||||
if other_count > 0:
|
||||
keys.append("other")
|
||||
values.append(other_count)
|
||||
pyplot_init()
|
||||
pyplot.pie(values, labels=keys, startangle=(90 - 360 * first_pct/2))
|
||||
if user is None:
|
||||
user = ""
|
||||
else:
|
||||
user = " by contributor "+user
|
||||
pyplot.title("MapComplete changes per theme"+year+user+ " - "+str(total)+" total changes")
|
||||
pyplot.savefig("Theme distribution" + user+year + ".png", dpi=400, facecolor='w', edgecolor='w',
|
||||
pyplot.pie(values, labels=keys, startangle=(90 - 360 * first_pct / 2))
|
||||
pyplot.title("MapComplete changes per theme" + fileExtra + " - " + str(total) + " total changes")
|
||||
pyplot.savefig("Theme distribution" + fileExtra + ".png", dpi=400, facecolor='w', edgecolor='w',
|
||||
bbox_inches='tight')
|
||||
return themes
|
||||
|
||||
def gen_theme_breakdown_graphs(contents, user=None):
|
||||
create_theme_breakdown(contents, "", user)
|
||||
|
||||
def cumulative_changes_per(contents, index, subject, filenameextra="", cutoff=5, cumulative=True):
|
||||
print("Creating graph about " + subject + filenameextra)
|
||||
themes = Hist("date")
|
||||
dates_per_theme = Hist("theme")
|
||||
all_themes = set()
|
||||
for row in contents:
|
||||
th = row[index]
|
||||
all_themes.add(th)
|
||||
themes.add(as_date(row[0]), th)
|
||||
dates_per_theme.add(th, row[0])
|
||||
per_theme_count = list(zip(dates_per_theme.keys(), dates_per_theme.map(len)))
|
||||
# PerThemeCount gives the most popular theme first
|
||||
per_theme_count.sort(key=lambda kv: kv[1], reverse=False)
|
||||
values_to_show = [] # (theme name, value to fill between - this is stacked, with the first layer to print last)
|
||||
running_totals = None
|
||||
other_total = 0
|
||||
other_theme_count = 0
|
||||
other_cumul = None
|
||||
|
||||
for kv in per_theme_count:
|
||||
theme = kv[0]
|
||||
total_for_this_theme = kv[1]
|
||||
if cumulative:
|
||||
edits_per_day_cumul = themes.mapcumul(
|
||||
lambda themes_for_date: len([x for x in themes_for_date if theme == x]),
|
||||
lambda a, b: a + b, 0)
|
||||
else:
|
||||
edits_per_day_cumul = themes.map(lambda themes_for_date: len([x for x in themes_for_date if theme == x]))
|
||||
|
||||
if running_totals is None:
|
||||
running_totals = edits_per_day_cumul
|
||||
else:
|
||||
running_totals = list(map(lambda ab: ab[0] + ab[1], zip(running_totals, edits_per_day_cumul)))
|
||||
|
||||
if total_for_this_theme >= cutoff:
|
||||
values_to_show.append((theme, running_totals))
|
||||
else:
|
||||
other_total += total_for_this_theme
|
||||
other_theme_count += 1
|
||||
if other_cumul is None:
|
||||
other_cumul = edits_per_day_cumul
|
||||
else:
|
||||
other_cumul = list(map(lambda ab: ab[0] + ab[1], zip(other_cumul, edits_per_day_cumul)))
|
||||
|
||||
keys = themes.keys()
|
||||
values_to_show.reverse()
|
||||
values_to_show.append(("other", other_cumul))
|
||||
totals = dict(per_theme_count)
|
||||
total = sum(totals.values())
|
||||
totals["other"] = other_total
|
||||
|
||||
pyplot_init()
|
||||
for kv in values_to_show:
|
||||
if kv[1] is None:
|
||||
continue # No 'other' graph
|
||||
msg = kv[0] + " (" + str(totals[kv[0]]) + ")"
|
||||
if kv[0] == "other":
|
||||
msg = str(other_theme_count) + " small " + subject + "s (" + str(other_total) + " changes)"
|
||||
pyplot.fill_between(keys, kv[1], label=msg)
|
||||
|
||||
if cumulative:
|
||||
cumulative_txt = "Cumulative changesets"
|
||||
else:
|
||||
cumulative_txt = "Changesets"
|
||||
pyplot.title(cumulative_txt + " per " + subject + filenameextra + " (" + str(total) + " changesets)")
|
||||
pyplot.legend(loc="upper left", ncol=3)
|
||||
pyplot.savefig(cumulative_txt + " per " + subject + filenameextra + ".png")
|
||||
|
||||
|
||||
def contents_where(contents, index, starts_with, invert=False):
|
||||
for row in contents:
|
||||
if row[index].startswith(starts_with) is not invert:
|
||||
yield row
|
||||
|
||||
|
||||
def create_graphs(contents):
|
||||
create_usercount_graphs(contents)
|
||||
create_theme_breakdown(contents)
|
||||
cumulative_changes_per(contents, 3, "theme", cutoff=10)
|
||||
cumulative_changes_per(contents, 1, "contributor", cutoff=15)
|
||||
cumulative_changes_per(contents, 2, "language", cutoff=1)
|
||||
cumulative_changes_per(contents, 4, "version number", cutoff=1)
|
||||
|
||||
currentYear = datetime.now().year
|
||||
for year in range(2020, currentYear + 1):
|
||||
create_theme_breakdown(contents, str(year), user)
|
||||
contents_filtered = list(contents_where(contents, 0, str(year)))
|
||||
extratext = " in " + str(year)
|
||||
create_usercount_graphs(contents_filtered, extratext)
|
||||
create_theme_breakdown(contents_filtered, extratext)
|
||||
cumulative_changes_per(contents_filtered, 3, "theme", extratext, cutoff=5)
|
||||
cumulative_changes_per(contents_filtered, 1, "contributor", extratext, cutoff=10)
|
||||
cumulative_changes_per(contents_filtered, 2, "language", extratext, cutoff=1)
|
||||
cumulative_changes_per(contents_filtered, 4, "version number", extratext, cutoff=1, cumulative=False)
|
||||
|
||||
def changes_per_theme_daily(contents):
|
||||
hist = {}
|
||||
|
||||
theme_remappings = {
|
||||
"metamap": "maps",
|
||||
"groen": "buurtnatuur",
|
||||
"wiki:mapcomplete/fritures": "fritures",
|
||||
"wiki:MapComplete/Fritures": "fritures",
|
||||
"lits": "lit",
|
||||
"pomp": "cyclofix",
|
||||
"wiki:user:joost_schouppe/campersite": "campersite",
|
||||
"wiki-user-joost_schouppe-geveltuintjes": "geveltuintjes",
|
||||
"wiki-user-joost_schouppe-campersite": "campersites",
|
||||
"wiki-User-joost_schouppe-campersite": "campersites",
|
||||
"wiki-User-joost_schouppe-geveltuintjes": "geveltuintjes",
|
||||
"wiki:User:joost_schouppe/campersite": "campersites",
|
||||
"https://raw.githubusercontent.com/osmbe/play/master/mapcomplete/geveltuinen/geveltuinen.json": "geveltuintjes"
|
||||
}
|
||||
|
||||
|
||||
def clean_input(contents):
|
||||
for row in contents:
|
||||
|
||||
|
||||
|
||||
theme = row[3].strip().strip("\"").lower()
|
||||
if theme == "null":
|
||||
# The theme metadata has only been set later on - we fetch this from the comment
|
||||
i = row[7].rfind("#")
|
||||
theme = row[7][i + 1:-1].lower()
|
||||
if theme in theme_remappings:
|
||||
theme = theme_remappings[theme]
|
||||
row[3] = theme
|
||||
yield [data.strip().strip("\"") for data in row]
|
||||
|
||||
|
||||
def main():
|
||||
print("Creating graphs...")
|
||||
with open('stats.csv', newline='') as csvfile:
|
||||
stats = list(csv.reader(csvfile, delimiter=',', quotechar='"'))
|
||||
print("Found "+str(len(stats))+" changesets")
|
||||
create_yearly_usercount_graphs(stats)
|
||||
gen_theme_breakdown_graphs(stats)
|
||||
stats = list(clean_input(csv.reader(csvfile, delimiter=',', quotechar='"')))
|
||||
print("Found " + str(len(stats)) + " changesets")
|
||||
create_graphs(stats)
|
||||
print("All done!")
|
||||
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ echo "" > tmp.csv
|
|||
for f in stats.*.json
|
||||
do
|
||||
echo $f
|
||||
jq ".features[].properties | [.date, .user, .metadata.language, .metadata.theme, .editor, .create, .modify]" "$f" | tr -d "\n" | sed "s/]\[/\n/g" | tr -d "][" >> tmp.csv
|
||||
jq ".features[].properties | [.date, .user, .metadata.language, .metadata.theme, .editor, .create, .modify, .comment]" "$f" | tr -d "\n" | sed "s/]\[/\n/g" | tr -d "][" >> tmp.csv
|
||||
echo "" >> tmp.csv
|
||||
done
|
||||
|
||||
|
|