Add some fancy graphs

This commit is contained in:
pietervdvn 2021-03-10 12:55:27 +01:00
parent f464600ab8
commit c49585a70a
11 changed files with 3120 additions and 0 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 199 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 196 KiB

BIN
Docs/Tools/Contributors.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 196 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 264 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 258 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 258 KiB

6
Docs/Tools/compileStats.sh Executable file
View file

@ -0,0 +1,6 @@
#! /bin/bash
./fetchStats.sh
./csvPerChange.sh
python csvGrapher.py

224
Docs/Tools/csvGrapher.py Normal file
View file

@ -0,0 +1,224 @@
import csv
from datetime import datetime
from matplotlib import pyplot
def clean(s):
return s.strip().strip("\"")
def counts(lst):
counts = {}
for v in lst:
if not v in counts:
counts[v] = 0
counts[v] += 1
return counts
class Hist:
def __init__(self, firstcolumn):
self.key = "\"" + firstcolumn + "\""
self.dictionary = {}
self.key = ""
def add(self, key, value):
if not key in self.dictionary:
self.dictionary[key] = []
self.dictionary[key].append(value)
def values(self):
allV = []
for v in self.dictionary.values():
allV += list(set(v))
return list(set(allV))
def keys(self):
return self.dictionary.keys()
def get(self, key):
if key in self.dictionary:
return self.dictionary[key]
return None
# Returns (keys, values.map(f)). To be used with e.g. pyplot.plot
def map(self, f):
vals = []
keys = self.keys()
for key in keys:
vals.append(f(self.get(key)))
return vals
def mapcumul(self, f, add, zero):
vals = []
running_value = zero
keys = self.keys()
for key in keys:
v = f(self.get(key))
running_value = add(running_value, v)
vals.append(running_value)
return vals
def csv(self):
csv = self.key + "," + ",".join(self.values())
header = self.values()
for k in self.dictionary.keys():
csv += k
values = counts(self.dictionary[k])
for head in header:
if head in values:
csv += "," + str(values[head])
else:
csv += ",0"
csv += "\n"
return csv
def build_hist(stats, keyIndex, valueIndex, condition=None):
hist = Hist("date")
c = 0
for row in stats:
print(row[0] + " => " + str(condition(row)))
if condition is not None and not condition(row):
continue
c += 1
row = list(map(clean, row))
hist.add(row[keyIndex], row[valueIndex])
return hist
def cumulative_users(stats, year=""):
users_hist = build_hist(stats, 0, 1, lambda row: row[0].startswith(year))
all_users_per_day = users_hist.mapcumul(
lambda users: set(users),
lambda a, b: a.union(b),
set([])
)
cumul_uniq = list(map(len, all_users_per_day))
unique_per_day = users_hist.map(lambda users: len(set(users)))
new_users = [0]
for i in range(len(cumul_uniq) - 1):
new_users.append(cumul_uniq[i + 1] - cumul_uniq[i])
dates = map(lambda dt: datetime.strptime(dt, "%Y-%m-%d"), users_hist.keys())
return list(dates), cumul_uniq, list(unique_per_day), list(new_users)
def pyplot_init():
pyplot.figure(figsize=(14, 8), dpi=200)
pyplot.xticks(rotation='vertical')
pyplot.tight_layout()
def create_usercount_graphs(stats, year="", show=False):
dates, cumul_uniq, unique_per_day, new_users = cumulative_users(stats, year)
total = cumul_uniq[-1]
if year != "":
year = " in " + year
pyplot_init()
pyplot.bar(dates, unique_per_day, label='Unique contributors')
pyplot.bar(dates, new_users, label='First time contributor via MapComplete')
pyplot.legend()
pyplot.title("Unique contributors" + year + ' with MapComplete (' + str(total) + ' contributors)')
pyplot.ylabel("Number of unique contributors")
pyplot.xlabel("Date")
if show:
pyplot.show()
else:
pyplot.savefig("Contributors" + year + ".png", dpi=400, facecolor='w', edgecolor='w', bbox_inches='tight')
pyplot_init()
pyplot.plot(dates, cumul_uniq, label='Cumulative unique contributors')
pyplot.legend()
pyplot.title("Cumulative unique contributors" + year + " with MapComplete - " + str(total) + " contributors")
pyplot.ylabel("Number of unique contributors")
pyplot.xlabel("Date")
if show:
pyplot.show()
else:
pyplot.savefig("CumulativeContributors" + year + ".png", dpi=400, facecolor='w', edgecolor='w',
bbox_inches='tight')
def create_yearly_usercount_graphs(contents):
create_usercount_graphs(contents)
currentYear = datetime.now().year
for year in range(2020, currentYear + 1):
create_usercount_graphs(contents, str(year))
theme_remappings = {
"null": "buurtnatuur",
"metamap": "maps",
"wiki:mapcomplete/fritures": "fritures",
"lits": "lit",
"wiki:user:joost_schouppe/campersite": "campersite",
"wiki-user-joost_schouppe-geveltuintjes": "geveltuintjes",
"wiki-user-joost_schouppe-campersite":"campersites",
"https://raw.githubusercontent.com/osmbe/play/master/mapcomplete/geveltuinen/geveltuinen.json": "geveltuintjes"
}
def create_theme_breakdown(stats, year="", user=None, columnIndex=3):
themeCounts = {}
for row in stats:
if not row[0].startswith(year):
continue
if user is not None and clean(row[1]) != user:
continue
theme = clean(row[columnIndex]).lower()
if theme in theme_remappings:
theme = theme_remappings[theme]
if theme in themeCounts:
themeCounts[theme] += 1
else:
themeCounts[theme] = 1
themes = list(themeCounts.items())
if len(themes) == 0:
print("No entries found for user "+user+" in "+year)
return
themes.sort(key=lambda kv : kv[1], reverse=True)
cutoff = 5
if user is not None:
cutoff = 0
other_count = sum([theme[1] for theme in themes if theme[1] < cutoff])
themes_filtered = [theme for theme in themes if theme[1] >= cutoff]
keys = list(map(lambda kv : kv[0] + " (" + str(kv[1])+")", themes_filtered))
values = list(map(lambda kv : kv[1], themes_filtered))
total =sum(map(lambda kv:kv[1], themes))
first_pct = themes[0][1] / total;
if year != "":
year = " in " + year
if other_count > 0:
keys.append("other")
values.append(other_count)
pyplot_init()
pyplot.pie(values, labels=keys, startangle=(90 - 360 * first_pct/2))
if user is None:
user = ""
else:
user = " by contributor "+user
pyplot.title("MapComplete changes per theme"+year+user+ " - "+str(total)+" total changes")
pyplot.savefig("Theme distribution" + user+year + ".png", dpi=400, facecolor='w', edgecolor='w',
bbox_inches='tight')
return themes
def gen_theme_breakdown_graphs(contents, user=None):
create_theme_breakdown(contents, "", user)
currentYear = datetime.now().year
for year in range(2020, currentYear + 1):
create_theme_breakdown(contents, str(year), user)
def main():
with open('stats.csv', newline='') as csvfile:
stats = list(csv.reader(csvfile, delimiter=',', quotechar='"'))
# create_yearly_usercount_graphs(stats)
gen_theme_breakdown_graphs(stats, "joost schouppe")
print("All done!")
main()

21
Docs/Tools/csvPerChange.sh Executable file
View file

@ -0,0 +1,21 @@
#! /bin/bash
if [[ ! -e stats.1.json ]]
then
echo "No stats found - not compiling"
exit
fi
rm stats.csv
# echo "date, username, language, theme, editor, creations, changes" > stats.csv
echo "" > tmp.csv
for f in stats.*.json
do
echo $f
jq ".features[].properties | [.date, .user, .metadata.language, .metadata.theme, .editor, .create, .modify]" "$f" | tr -d "\n" | sed "s/]\[/\n/g" | tr -d "][" >> tmp.csv
echo "" >> tmp.csv
done
sed "/^$/d" tmp.csv | sed "s/^ //" | sed "s/ / /g" | sed "s/\"\(....-..-..\)T........./\"\1/" | sort >> stats.csv
rm tmp.csv

18
Docs/Tools/fetchStats.sh Executable file
View file

@ -0,0 +1,18 @@
DATE=$(date +"%Y-%m-%d%%20%H%%3A%M")
COUNTER=1
if [[ $1 != "" ]]
then
echo "Starting at $1"
COUNTER="$1"
fi
NEXT_URL=$(echo "https://osmcha.org/api/v1/changesets/?date__gte=2020-07-05&date__lte=$DATE&editor=mapcomplete&page=$COUNTER&page_size=1000")
rm stats.*.json
while [[ "$NEXT_URL" != "" ]]
do
echo "$COUNTER '$NEXT_URL'"
curl "$NEXT_URL" -H 'User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0' -H 'Accept: */*' -H 'Accept-Language: en-US,en;q=0.5' --compressed -H 'Referer: https://osmcha.org/?filters=%7B%22date__gte%22%3A%5B%7B%22label%22%3A%222020-07-05%22%2C%22value%22%3A%222020-07-05%22%7D%5D%2C%22editor%22%3A%5B%7B%22label%22%3A%22mapcomplete%22%2C%22value%22%3A%22mapcomplete%22%7D%5D%7D' -H 'Content-Type: application/json' -H 'Authorization: Token 6e422e2afedb79ef66573982012000281f03dc91' -H 'DNT: 1' -H 'Connection: keep-alive' -H 'TE: Trailers' -H 'Pragma: no-cache' -H 'Cache-Control: no-cache' -o stats.$COUNTER.json
NEXT_URL=$(jq ".next" stats.$COUNTER.json | sed "s/\"//g")
let COUNTER++
done;

2851
Docs/Tools/stats.csv Normal file

File diff suppressed because it is too large Load diff