Alter 5 files
Add .gitignore Add Screenshot 2025-05-08 at 21-15-26 Popepedia abi abi.png Add collect.py Add index.html Add tabulate.py
This commit is contained in:
commit
01a5cc6df3
5 changed files with 271 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
token.txt
|
BIN
Screenshot 2025-05-08 at 21-15-26 Popepedia abi abi.png
Normal file
BIN
Screenshot 2025-05-08 at 21-15-26 Popepedia abi abi.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 236 KiB |
67
collect.py
Normal file
67
collect.py
Normal file
|
@ -0,0 +1,67 @@
|
|||
import requests
|
||||
import time
|
||||
import json
|
||||
import os
|
||||
|
||||
wikipedia_token = open("token.txt").read().strip()
|
||||
|
||||
headers = {
|
||||
'Authorization': 'Bearer ' + wikipedia_token,
|
||||
}
|
||||
|
||||
try:
|
||||
with open('pope.jsonl', 'rb') as f:
|
||||
try: # catch OSError in case of a one line file
|
||||
f.seek(-2, os.SEEK_END)
|
||||
while f.read(1) != b'\n':
|
||||
f.seek(-2, os.SEEK_CUR)
|
||||
except OSError:
|
||||
f.seek(0)
|
||||
last_line = f.readline().decode()
|
||||
|
||||
ll = json.loads(last_line)
|
||||
print(ll)
|
||||
last_revision = ll["id"]
|
||||
except Exception as e:
|
||||
last_revision = "1289422020"
|
||||
|
||||
# 1289435017 is the last pre-pope edit
|
||||
# 1289422020 is a couple edits prior
|
||||
|
||||
page = 'Pope_Leo_XIV'
|
||||
url = 'https://api.wikimedia.org/core/v1/wikipedia/en/page/Pope_Leo_XIV/history?newer_than=' + str(last_revision)
|
||||
|
||||
n = 0
|
||||
# total_delta = 0
|
||||
|
||||
new = []
|
||||
|
||||
while True:
|
||||
print(url)
|
||||
|
||||
response = requests.get(url, headers=headers)
|
||||
data = response.json()
|
||||
|
||||
new += list(reversed(data.get("revisions", [])))
|
||||
n += len(data.get("revisions", []))
|
||||
|
||||
# n += len(data.get("revisions", []))
|
||||
# total_delta += sum(map(lambda x: abs(x.get("delta", 0)), data.get("revisions", [])))
|
||||
print("rl remains", response.headers.get("x-ratelimit-remaining"), "rl resets", response.headers.get("x-ratelimit-reset"))
|
||||
|
||||
if "newer" not in data:
|
||||
break
|
||||
|
||||
url = data["newer"]
|
||||
|
||||
print("current", data["revisions"][0]["timestamp"])
|
||||
print(f"{n=}")
|
||||
|
||||
time.sleep(0.5)
|
||||
|
||||
print("FINAL", n)
|
||||
# print("total delta", total_delta)
|
||||
|
||||
with open("pope.jsonl", "a") as f:
|
||||
for item in new:
|
||||
f.write(json.dumps(item) + "\n")
|
140
index.html
Normal file
140
index.html
Normal file
|
@ -0,0 +1,140 @@
|
|||
---
|
||||
title: "Popepedia"
|
||||
description: "Information about the edit frequency of the new Pope's Wikipedia page"
|
||||
hideAside: true
|
||||
---
|
||||
{% extends "_layouts/base.html" %}
|
||||
{% block head %}
|
||||
<script src="https://cdn.jsdelivr.net/npm/echarts@5.6.0/dist/echarts.min.js" integrity="sha256-v0oiNSTkC3fDBL7GfhIiz1UfFIgM9Cxp3ARlWOEcB7E=" crossorigin="anonymous"></script>
|
||||
{% endblock %}
|
||||
|
||||
{% block main %}
|
||||
<h1>Popepedia</h1>
|
||||
<p>Here are some stats about the <a href="https://en.wikipedia.org/wiki/Pope_Leo_XIV">new Pope's wikipedia page</a> and how often it's being edited. The 2 edits prior to the announcement included for a little context. Data last updated at <span id="updatedAt"></span>.</p>
|
||||
<p>Since announcement:</p>
|
||||
<ul>
|
||||
<li>Total edits: <span id="totalEdits"></span></li>
|
||||
<li>Total volume: <span id="totalVolume"></span></li>
|
||||
</ul>
|
||||
<center><div id="editchart" style="width: 80%;height: 500px;"></div></center>
|
||||
<center><div id="volumechart" style="width: 80%;height: 500px;"></div></center>
|
||||
|
||||
<script type="text/javascript">
|
||||
setTimeout(window.location.reload, 1000 * 60)
|
||||
|
||||
window
|
||||
.fetch(new Request("data.json"))
|
||||
.then((response) => {
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP error! Status: ${response.status}`);
|
||||
}
|
||||
|
||||
return response.json();
|
||||
})
|
||||
.then((response) => {
|
||||
var data = response.data
|
||||
document.getElementById("updatedAt").innerText = response.updated_at
|
||||
document.getElementById("totalEdits").innerText = response.total_edits
|
||||
document.getElementById("totalVolume").innerText = response.total_bytes_changed + " characters"
|
||||
|
||||
// Initialize the echarts instance based on the prepared dom
|
||||
var myChart = echarts.init(document.getElementById('editchart'));
|
||||
|
||||
// Specify the configuration items and data for the chart
|
||||
var option = {
|
||||
title: {
|
||||
text: 'Edits over time'
|
||||
},
|
||||
tooltip: {
|
||||
trigger: 'axis',
|
||||
},
|
||||
dataset: {
|
||||
source: data,
|
||||
dimensions: ['timestamp', "edits", "avg_edits", "running_edits", 'vol', 'avg_vol', "running_vol"],
|
||||
},
|
||||
xAxis: { type: 'time' },
|
||||
yAxis: {name: 'Edit count', nameLocation: 'center', nameGap: 45},
|
||||
series: [
|
||||
// {
|
||||
// name: 'Raw edit count',
|
||||
// yAxisIndex: 0,
|
||||
// type: 'line',
|
||||
// encode: {
|
||||
// x: 'timestamp',
|
||||
// y: 'edits'
|
||||
// }
|
||||
// },
|
||||
{
|
||||
name: 'Average edit count',
|
||||
yAxisIndex: 0,
|
||||
type: 'line',
|
||||
encode: {
|
||||
x: 'timestamp',
|
||||
y: "avg_edits"
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'Running edit count (hundreds)',
|
||||
yAxisIndex: 0,
|
||||
type: 'line',
|
||||
encode: {
|
||||
x: 'timestamp',
|
||||
y: "running_edits"
|
||||
}
|
||||
}
|
||||
]
|
||||
};
|
||||
|
||||
// Display the chart using the configuration items and data just specified.
|
||||
myChart.setOption(option);
|
||||
|
||||
myChart = echarts.init(document.getElementById('volumechart'));
|
||||
|
||||
// Specify the configuration items and data for the chart
|
||||
option = {
|
||||
title: {
|
||||
text: 'Volume over time'
|
||||
},
|
||||
tooltip: {
|
||||
trigger: 'axis',
|
||||
},
|
||||
dataset: {
|
||||
source: data,
|
||||
dimensions: ['timestamp', "edits", "avg_edits", "running_edits", 'vol', 'avg_vol', "running_vol"],
|
||||
},
|
||||
xAxis: { type: 'time' },
|
||||
yAxis: {name: 'Volume (characters)', nameLocation: 'center', nameGap: 60},
|
||||
series: [
|
||||
// {
|
||||
// name: 'Raw edit volume',
|
||||
// yAxisIndex: 1,
|
||||
// type: 'line',
|
||||
// encode: {
|
||||
// x: 'timestamp',
|
||||
// y: 'vol'
|
||||
// }
|
||||
// },
|
||||
{
|
||||
name: 'Average edit volume (characters)',
|
||||
type: 'line',
|
||||
encode: {
|
||||
x: 'timestamp',
|
||||
y: 'avg_vol'
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'Running edit volume (hundreds of characters)',
|
||||
type: 'line',
|
||||
encode: {
|
||||
x: 'timestamp',
|
||||
y: "running_vol"
|
||||
}
|
||||
},
|
||||
]
|
||||
};
|
||||
|
||||
// Display the chart using the configuration items and data just specified.
|
||||
myChart.setOption(option);
|
||||
});
|
||||
</script>
|
||||
{% endblock %}
|
63
tabulate.py
Normal file
63
tabulate.py
Normal file
|
@ -0,0 +1,63 @@
|
|||
import json
|
||||
from collections import defaultdict
|
||||
|
||||
points = defaultdict(list)
|
||||
|
||||
with open("pope.jsonl") as f:
|
||||
for line in f:
|
||||
data = json.loads(line.strip())
|
||||
|
||||
# 2025-05-08T18:17:09Z
|
||||
ts = data["timestamp"]
|
||||
key = ts[:17] + "00" + ts[19:]
|
||||
points[key].append(data)
|
||||
|
||||
totals = []
|
||||
|
||||
for minute in points:
|
||||
n = 0
|
||||
bs = 0
|
||||
for thing in points[minute]:
|
||||
n += 1
|
||||
bs += abs(thing["delta"])
|
||||
totals.append({"time": minute, "edits": n, "bytes": bs})
|
||||
|
||||
running_edits = 0
|
||||
running_bytes = 0
|
||||
|
||||
for i in range(len(totals)):
|
||||
running_edits += totals[i]["edits"]
|
||||
running_bytes += totals[i]["bytes"]
|
||||
|
||||
totals[i]["running_edits"] = running_edits
|
||||
totals[i]["running_bytes"] = running_bytes
|
||||
|
||||
ns = totals[max(0, i-5):i]
|
||||
if len(ns) == 0:
|
||||
totals[i]["avg_bytes"] = totals[i]["bytes"]
|
||||
else:
|
||||
totals[i]["avg_bytes"] = sum(map(lambda x: x["bytes"], ns)) / len(ns)
|
||||
|
||||
if len(ns) == 0:
|
||||
totals[i]["avg_edits"] = totals[i]["edits"]
|
||||
else:
|
||||
totals[i]["avg_edits"] = sum(map(lambda x: x["edits"], ns)) / len(ns)
|
||||
|
||||
# result = {
|
||||
# "total_edits": sum(totals[k]["number"] for k in totals),
|
||||
# "total_bytes_changed": sum(totals[k]["bytes"] for k in totals),
|
||||
# "time_series": totals,
|
||||
# }
|
||||
|
||||
result = []
|
||||
for minute in range(len(totals)):
|
||||
result.append([totals[minute]["time"], totals[minute]["edits"], totals[minute]["avg_edits"], totals[minute]["running_edits"]/100, totals[minute]["bytes"], totals[minute]["avg_bytes"], totals[minute]["running_bytes"]/100])
|
||||
|
||||
import datetime
|
||||
|
||||
print(json.dumps({
|
||||
"total_edits": sum(k["edits"] for k in totals),
|
||||
"total_bytes_changed": sum(k["bytes"] for k in totals),
|
||||
"updated_at": datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M"),
|
||||
"data": result,
|
||||
}))
|
Loading…
Add table
Add a link
Reference in a new issue