Alter 5 files

Add .gitignore
Add Screenshot 2025-05-08 at 21-15-26 Popepedia abi abi.png
Add collect.py
Add index.html
Add tabulate.py
This commit is contained in:
akp 2025-05-10 17:21:08 +01:00
commit 01a5cc6df3
5 changed files with 271 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
token.txt

Binary file not shown.

After

Width:  |  Height:  |  Size: 236 KiB

67
collect.py Normal file
View file

@ -0,0 +1,67 @@
import requests
import time
import json
import os
wikipedia_token = open("token.txt").read().strip()
headers = {
'Authorization': 'Bearer ' + wikipedia_token,
}
try:
with open('pope.jsonl', 'rb') as f:
try: # catch OSError in case of a one line file
f.seek(-2, os.SEEK_END)
while f.read(1) != b'\n':
f.seek(-2, os.SEEK_CUR)
except OSError:
f.seek(0)
last_line = f.readline().decode()
ll = json.loads(last_line)
print(ll)
last_revision = ll["id"]
except Exception as e:
last_revision = "1289422020"
# 1289435017 is the last pre-pope edit
# 1289422020 is a couple edits prior
page = 'Pope_Leo_XIV'
url = 'https://api.wikimedia.org/core/v1/wikipedia/en/page/Pope_Leo_XIV/history?newer_than=' + str(last_revision)
n = 0
# total_delta = 0
new = []
while True:
print(url)
response = requests.get(url, headers=headers)
data = response.json()
new += list(reversed(data.get("revisions", [])))
n += len(data.get("revisions", []))
# n += len(data.get("revisions", []))
# total_delta += sum(map(lambda x: abs(x.get("delta", 0)), data.get("revisions", [])))
print("rl remains", response.headers.get("x-ratelimit-remaining"), "rl resets", response.headers.get("x-ratelimit-reset"))
if "newer" not in data:
break
url = data["newer"]
print("current", data["revisions"][0]["timestamp"])
print(f"{n=}")
time.sleep(0.5)
print("FINAL", n)
# print("total delta", total_delta)
with open("pope.jsonl", "a") as f:
for item in new:
f.write(json.dumps(item) + "\n")

140
index.html Normal file
View file

@ -0,0 +1,140 @@
---
title: "Popepedia"
description: "Information about the edit frequency of the new Pope's Wikipedia page"
hideAside: true
---
{% extends "_layouts/base.html" %}
{% block head %}
<script src="https://cdn.jsdelivr.net/npm/echarts@5.6.0/dist/echarts.min.js" integrity="sha256-v0oiNSTkC3fDBL7GfhIiz1UfFIgM9Cxp3ARlWOEcB7E=" crossorigin="anonymous"></script>
{% endblock %}
{% block main %}
<h1>Popepedia</h1>
<p>Here are some stats about the <a href="https://en.wikipedia.org/wiki/Pope_Leo_XIV">new Pope's wikipedia page</a> and how often it's being edited. The 2 edits prior to the announcement included for a little context. Data last updated at <span id="updatedAt"></span>.</p>
<p>Since announcement:</p>
<ul>
<li>Total edits: <span id="totalEdits"></span></li>
<li>Total volume: <span id="totalVolume"></span></li>
</ul>
<center><div id="editchart" style="width: 80%;height: 500px;"></div></center>
<center><div id="volumechart" style="width: 80%;height: 500px;"></div></center>
<script type="text/javascript">
setTimeout(window.location.reload, 1000 * 60)
window
.fetch(new Request("data.json"))
.then((response) => {
if (!response.ok) {
throw new Error(`HTTP error! Status: ${response.status}`);
}
return response.json();
})
.then((response) => {
var data = response.data
document.getElementById("updatedAt").innerText = response.updated_at
document.getElementById("totalEdits").innerText = response.total_edits
document.getElementById("totalVolume").innerText = response.total_bytes_changed + " characters"
// Initialize the echarts instance based on the prepared dom
var myChart = echarts.init(document.getElementById('editchart'));
// Specify the configuration items and data for the chart
var option = {
title: {
text: 'Edits over time'
},
tooltip: {
trigger: 'axis',
},
dataset: {
source: data,
dimensions: ['timestamp', "edits", "avg_edits", "running_edits", 'vol', 'avg_vol', "running_vol"],
},
xAxis: { type: 'time' },
yAxis: {name: 'Edit count', nameLocation: 'center', nameGap: 45},
series: [
// {
// name: 'Raw edit count',
// yAxisIndex: 0,
// type: 'line',
// encode: {
// x: 'timestamp',
// y: 'edits'
// }
// },
{
name: 'Average edit count',
yAxisIndex: 0,
type: 'line',
encode: {
x: 'timestamp',
y: "avg_edits"
}
},
{
name: 'Running edit count (hundreds)',
yAxisIndex: 0,
type: 'line',
encode: {
x: 'timestamp',
y: "running_edits"
}
}
]
};
// Display the chart using the configuration items and data just specified.
myChart.setOption(option);
myChart = echarts.init(document.getElementById('volumechart'));
// Specify the configuration items and data for the chart
option = {
title: {
text: 'Volume over time'
},
tooltip: {
trigger: 'axis',
},
dataset: {
source: data,
dimensions: ['timestamp', "edits", "avg_edits", "running_edits", 'vol', 'avg_vol', "running_vol"],
},
xAxis: { type: 'time' },
yAxis: {name: 'Volume (characters)', nameLocation: 'center', nameGap: 60},
series: [
// {
// name: 'Raw edit volume',
// yAxisIndex: 1,
// type: 'line',
// encode: {
// x: 'timestamp',
// y: 'vol'
// }
// },
{
name: 'Average edit volume (characters)',
type: 'line',
encode: {
x: 'timestamp',
y: 'avg_vol'
}
},
{
name: 'Running edit volume (hundreds of characters)',
type: 'line',
encode: {
x: 'timestamp',
y: "running_vol"
}
},
]
};
// Display the chart using the configuration items and data just specified.
myChart.setOption(option);
});
</script>
{% endblock %}

63
tabulate.py Normal file
View file

@ -0,0 +1,63 @@
import json
from collections import defaultdict
points = defaultdict(list)
with open("pope.jsonl") as f:
for line in f:
data = json.loads(line.strip())
# 2025-05-08T18:17:09Z
ts = data["timestamp"]
key = ts[:17] + "00" + ts[19:]
points[key].append(data)
totals = []
for minute in points:
n = 0
bs = 0
for thing in points[minute]:
n += 1
bs += abs(thing["delta"])
totals.append({"time": minute, "edits": n, "bytes": bs})
running_edits = 0
running_bytes = 0
for i in range(len(totals)):
running_edits += totals[i]["edits"]
running_bytes += totals[i]["bytes"]
totals[i]["running_edits"] = running_edits
totals[i]["running_bytes"] = running_bytes
ns = totals[max(0, i-5):i]
if len(ns) == 0:
totals[i]["avg_bytes"] = totals[i]["bytes"]
else:
totals[i]["avg_bytes"] = sum(map(lambda x: x["bytes"], ns)) / len(ns)
if len(ns) == 0:
totals[i]["avg_edits"] = totals[i]["edits"]
else:
totals[i]["avg_edits"] = sum(map(lambda x: x["edits"], ns)) / len(ns)
# result = {
# "total_edits": sum(totals[k]["number"] for k in totals),
# "total_bytes_changed": sum(totals[k]["bytes"] for k in totals),
# "time_series": totals,
# }
result = []
for minute in range(len(totals)):
result.append([totals[minute]["time"], totals[minute]["edits"], totals[minute]["avg_edits"], totals[minute]["running_edits"]/100, totals[minute]["bytes"], totals[minute]["avg_bytes"], totals[minute]["running_bytes"]/100])
import datetime
print(json.dumps({
"total_edits": sum(k["edits"] for k in totals),
"total_bytes_changed": sum(k["bytes"] for k in totals),
"updated_at": datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M"),
"data": result,
}))