Add some fancy graphs
This commit is contained in:
parent
f464600ab8
commit
c49585a70a
11 changed files with 3120 additions and 0 deletions
BIN
Docs/Tools/Contributors in 2020.png
Normal file
BIN
Docs/Tools/Contributors in 2020.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 199 KiB |
BIN
Docs/Tools/Contributors in 2021.png
Normal file
BIN
Docs/Tools/Contributors in 2021.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 196 KiB |
BIN
Docs/Tools/Contributors.png
Normal file
BIN
Docs/Tools/Contributors.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 196 KiB |
BIN
Docs/Tools/CumulativeContributors in 2020.png
Normal file
BIN
Docs/Tools/CumulativeContributors in 2020.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 264 KiB |
BIN
Docs/Tools/CumulativeContributors in 2021.png
Normal file
BIN
Docs/Tools/CumulativeContributors in 2021.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 258 KiB |
BIN
Docs/Tools/CumulativeContributors.png
Normal file
BIN
Docs/Tools/CumulativeContributors.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 258 KiB |
6
Docs/Tools/compileStats.sh
Executable file
6
Docs/Tools/compileStats.sh
Executable file
|
@ -0,0 +1,6 @@
|
|||
#! /bin/bash
|
||||
|
||||
./fetchStats.sh
|
||||
./csvPerChange.sh
|
||||
python csvGrapher.py
|
||||
|
224
Docs/Tools/csvGrapher.py
Normal file
224
Docs/Tools/csvGrapher.py
Normal file
|
@ -0,0 +1,224 @@
|
|||
import csv
|
||||
from datetime import datetime
|
||||
|
||||
from matplotlib import pyplot
|
||||
|
||||
|
||||
def clean(s):
|
||||
return s.strip().strip("\"")
|
||||
|
||||
|
||||
def counts(lst):
|
||||
counts = {}
|
||||
for v in lst:
|
||||
if not v in counts:
|
||||
counts[v] = 0
|
||||
counts[v] += 1
|
||||
return counts
|
||||
|
||||
|
||||
class Hist:
|
||||
|
||||
def __init__(self, firstcolumn):
|
||||
self.key = "\"" + firstcolumn + "\""
|
||||
self.dictionary = {}
|
||||
self.key = ""
|
||||
|
||||
def add(self, key, value):
|
||||
if not key in self.dictionary:
|
||||
self.dictionary[key] = []
|
||||
self.dictionary[key].append(value)
|
||||
|
||||
def values(self):
|
||||
allV = []
|
||||
for v in self.dictionary.values():
|
||||
allV += list(set(v))
|
||||
return list(set(allV))
|
||||
|
||||
def keys(self):
|
||||
return self.dictionary.keys()
|
||||
|
||||
def get(self, key):
|
||||
if key in self.dictionary:
|
||||
return self.dictionary[key]
|
||||
return None
|
||||
|
||||
# Returns (keys, values.map(f)). To be used with e.g. pyplot.plot
|
||||
def map(self, f):
|
||||
vals = []
|
||||
keys = self.keys()
|
||||
for key in keys:
|
||||
vals.append(f(self.get(key)))
|
||||
return vals
|
||||
|
||||
def mapcumul(self, f, add, zero):
|
||||
vals = []
|
||||
running_value = zero
|
||||
keys = self.keys()
|
||||
for key in keys:
|
||||
v = f(self.get(key))
|
||||
running_value = add(running_value, v)
|
||||
vals.append(running_value)
|
||||
return vals
|
||||
|
||||
def csv(self):
|
||||
csv = self.key + "," + ",".join(self.values())
|
||||
header = self.values()
|
||||
for k in self.dictionary.keys():
|
||||
csv += k
|
||||
values = counts(self.dictionary[k])
|
||||
for head in header:
|
||||
if head in values:
|
||||
csv += "," + str(values[head])
|
||||
else:
|
||||
csv += ",0"
|
||||
csv += "\n"
|
||||
return csv
|
||||
|
||||
|
||||
def build_hist(stats, keyIndex, valueIndex, condition=None):
|
||||
hist = Hist("date")
|
||||
c = 0
|
||||
for row in stats:
|
||||
print(row[0] + " => " + str(condition(row)))
|
||||
if condition is not None and not condition(row):
|
||||
continue
|
||||
c += 1
|
||||
row = list(map(clean, row))
|
||||
hist.add(row[keyIndex], row[valueIndex])
|
||||
return hist
|
||||
|
||||
|
||||
def cumulative_users(stats, year=""):
|
||||
users_hist = build_hist(stats, 0, 1, lambda row: row[0].startswith(year))
|
||||
all_users_per_day = users_hist.mapcumul(
|
||||
lambda users: set(users),
|
||||
lambda a, b: a.union(b),
|
||||
set([])
|
||||
)
|
||||
cumul_uniq = list(map(len, all_users_per_day))
|
||||
unique_per_day = users_hist.map(lambda users: len(set(users)))
|
||||
new_users = [0]
|
||||
for i in range(len(cumul_uniq) - 1):
|
||||
new_users.append(cumul_uniq[i + 1] - cumul_uniq[i])
|
||||
dates = map(lambda dt: datetime.strptime(dt, "%Y-%m-%d"), users_hist.keys())
|
||||
return list(dates), cumul_uniq, list(unique_per_day), list(new_users)
|
||||
|
||||
|
||||
def pyplot_init():
|
||||
pyplot.figure(figsize=(14, 8), dpi=200)
|
||||
pyplot.xticks(rotation='vertical')
|
||||
pyplot.tight_layout()
|
||||
|
||||
|
||||
def create_usercount_graphs(stats, year="", show=False):
|
||||
dates, cumul_uniq, unique_per_day, new_users = cumulative_users(stats, year)
|
||||
total = cumul_uniq[-1]
|
||||
|
||||
if year != "":
|
||||
year = " in " + year
|
||||
pyplot_init()
|
||||
pyplot.bar(dates, unique_per_day, label='Unique contributors')
|
||||
pyplot.bar(dates, new_users, label='First time contributor via MapComplete')
|
||||
pyplot.legend()
|
||||
pyplot.title("Unique contributors" + year + ' with MapComplete (' + str(total) + ' contributors)')
|
||||
pyplot.ylabel("Number of unique contributors")
|
||||
pyplot.xlabel("Date")
|
||||
if show:
|
||||
pyplot.show()
|
||||
else:
|
||||
pyplot.savefig("Contributors" + year + ".png", dpi=400, facecolor='w', edgecolor='w', bbox_inches='tight')
|
||||
|
||||
pyplot_init()
|
||||
pyplot.plot(dates, cumul_uniq, label='Cumulative unique contributors')
|
||||
pyplot.legend()
|
||||
pyplot.title("Cumulative unique contributors" + year + " with MapComplete - " + str(total) + " contributors")
|
||||
pyplot.ylabel("Number of unique contributors")
|
||||
pyplot.xlabel("Date")
|
||||
if show:
|
||||
pyplot.show()
|
||||
else:
|
||||
pyplot.savefig("CumulativeContributors" + year + ".png", dpi=400, facecolor='w', edgecolor='w',
|
||||
bbox_inches='tight')
|
||||
|
||||
|
||||
def create_yearly_usercount_graphs(contents):
|
||||
create_usercount_graphs(contents)
|
||||
currentYear = datetime.now().year
|
||||
for year in range(2020, currentYear + 1):
|
||||
create_usercount_graphs(contents, str(year))
|
||||
|
||||
|
||||
theme_remappings = {
|
||||
"null": "buurtnatuur",
|
||||
"metamap": "maps",
|
||||
"wiki:mapcomplete/fritures": "fritures",
|
||||
"lits": "lit",
|
||||
"wiki:user:joost_schouppe/campersite": "campersite",
|
||||
"wiki-user-joost_schouppe-geveltuintjes": "geveltuintjes",
|
||||
"wiki-user-joost_schouppe-campersite":"campersites",
|
||||
"https://raw.githubusercontent.com/osmbe/play/master/mapcomplete/geveltuinen/geveltuinen.json": "geveltuintjes"
|
||||
}
|
||||
|
||||
|
||||
def create_theme_breakdown(stats, year="", user=None, columnIndex=3):
|
||||
themeCounts = {}
|
||||
for row in stats:
|
||||
if not row[0].startswith(year):
|
||||
continue
|
||||
if user is not None and clean(row[1]) != user:
|
||||
continue
|
||||
theme = clean(row[columnIndex]).lower()
|
||||
if theme in theme_remappings:
|
||||
theme = theme_remappings[theme]
|
||||
if theme in themeCounts:
|
||||
themeCounts[theme] += 1
|
||||
else:
|
||||
themeCounts[theme] = 1
|
||||
themes = list(themeCounts.items())
|
||||
if len(themes) == 0:
|
||||
print("No entries found for user "+user+" in "+year)
|
||||
return
|
||||
themes.sort(key=lambda kv : kv[1], reverse=True)
|
||||
|
||||
cutoff = 5
|
||||
if user is not None:
|
||||
cutoff = 0
|
||||
other_count = sum([theme[1] for theme in themes if theme[1] < cutoff])
|
||||
themes_filtered = [theme for theme in themes if theme[1] >= cutoff]
|
||||
keys = list(map(lambda kv : kv[0] + " (" + str(kv[1])+")", themes_filtered))
|
||||
values = list(map(lambda kv : kv[1], themes_filtered))
|
||||
total =sum(map(lambda kv:kv[1], themes))
|
||||
first_pct = themes[0][1] / total;
|
||||
if year != "":
|
||||
year = " in " + year
|
||||
|
||||
if other_count > 0:
|
||||
keys.append("other")
|
||||
values.append(other_count)
|
||||
pyplot_init()
|
||||
pyplot.pie(values, labels=keys, startangle=(90 - 360 * first_pct/2))
|
||||
if user is None:
|
||||
user = ""
|
||||
else:
|
||||
user = " by contributor "+user
|
||||
pyplot.title("MapComplete changes per theme"+year+user+ " - "+str(total)+" total changes")
|
||||
pyplot.savefig("Theme distribution" + user+year + ".png", dpi=400, facecolor='w', edgecolor='w',
|
||||
bbox_inches='tight')
|
||||
return themes
|
||||
|
||||
def gen_theme_breakdown_graphs(contents, user=None):
|
||||
create_theme_breakdown(contents, "", user)
|
||||
currentYear = datetime.now().year
|
||||
for year in range(2020, currentYear + 1):
|
||||
create_theme_breakdown(contents, str(year), user)
|
||||
|
||||
def main():
|
||||
with open('stats.csv', newline='') as csvfile:
|
||||
stats = list(csv.reader(csvfile, delimiter=',', quotechar='"'))
|
||||
# create_yearly_usercount_graphs(stats)
|
||||
gen_theme_breakdown_graphs(stats, "joost schouppe")
|
||||
print("All done!")
|
||||
|
||||
|
||||
main()
|
21
Docs/Tools/csvPerChange.sh
Executable file
21
Docs/Tools/csvPerChange.sh
Executable file
|
@ -0,0 +1,21 @@
|
|||
#! /bin/bash
|
||||
|
||||
if [[ ! -e stats.1.json ]]
|
||||
then
|
||||
echo "No stats found - not compiling"
|
||||
exit
|
||||
fi
|
||||
|
||||
rm stats.csv
|
||||
# echo "date, username, language, theme, editor, creations, changes" > stats.csv
|
||||
echo "" > tmp.csv
|
||||
|
||||
for f in stats.*.json
|
||||
do
|
||||
echo $f
|
||||
jq ".features[].properties | [.date, .user, .metadata.language, .metadata.theme, .editor, .create, .modify]" "$f" | tr -d "\n" | sed "s/]\[/\n/g" | tr -d "][" >> tmp.csv
|
||||
echo "" >> tmp.csv
|
||||
done
|
||||
|
||||
sed "/^$/d" tmp.csv | sed "s/^ //" | sed "s/ / /g" | sed "s/\"\(....-..-..\)T........./\"\1/" | sort >> stats.csv
|
||||
rm tmp.csv
|
18
Docs/Tools/fetchStats.sh
Executable file
18
Docs/Tools/fetchStats.sh
Executable file
|
@ -0,0 +1,18 @@
|
|||
DATE=$(date +"%Y-%m-%d%%20%H%%3A%M")
|
||||
COUNTER=1
|
||||
if [[ $1 != "" ]]
|
||||
then
|
||||
echo "Starting at $1"
|
||||
COUNTER="$1"
|
||||
fi
|
||||
|
||||
NEXT_URL=$(echo "https://osmcha.org/api/v1/changesets/?date__gte=2020-07-05&date__lte=$DATE&editor=mapcomplete&page=$COUNTER&page_size=1000")
|
||||
rm stats.*.json
|
||||
while [[ "$NEXT_URL" != "" ]]
|
||||
do
|
||||
echo "$COUNTER '$NEXT_URL'"
|
||||
curl "$NEXT_URL" -H 'User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0' -H 'Accept: */*' -H 'Accept-Language: en-US,en;q=0.5' --compressed -H 'Referer: https://osmcha.org/?filters=%7B%22date__gte%22%3A%5B%7B%22label%22%3A%222020-07-05%22%2C%22value%22%3A%222020-07-05%22%7D%5D%2C%22editor%22%3A%5B%7B%22label%22%3A%22mapcomplete%22%2C%22value%22%3A%22mapcomplete%22%7D%5D%7D' -H 'Content-Type: application/json' -H 'Authorization: Token 6e422e2afedb79ef66573982012000281f03dc91' -H 'DNT: 1' -H 'Connection: keep-alive' -H 'TE: Trailers' -H 'Pragma: no-cache' -H 'Cache-Control: no-cache' -o stats.$COUNTER.json
|
||||
|
||||
NEXT_URL=$(jq ".next" stats.$COUNTER.json | sed "s/\"//g")
|
||||
let COUNTER++
|
||||
done;
|
2851
Docs/Tools/stats.csv
Normal file
2851
Docs/Tools/stats.csv
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue