User:Hkm/Rankings program
< User:Hkm
import re
import requests
from bs4 import BeautifulSoup
def process_talk(url):
pattern = r'^([^:]+):\s+([^.]*)\.\s+([0-9.]+)$'
data = {}
try:
response = requests.get(url)
response.raise_for_status() # Raise exception for HTTP errors
# Parse the edit page content to find the wikitext
soup = BeautifulSoup(response.text, 'html.parser')
textarea = soup.find('textarea', {'id': 'wpTextbox1'})
if not textarea:
print("Could not find document content in the page")
return {}
lines = textarea.text.split('\n')
for line in lines:
line = line.strip()
match = re.match(pattern, line)
if not match:
continue
thing_a = match.group(1)
thing_b = match.group(2)
number_str = match.group(3)
if len(thing_a) == 0 or len(thing_b) == 0:
continue
if len(thing_a) > 40 or len(thing_b) > 40:
continue
try:
number = float(number_str)
if not (0 <= number <= 5):
continue
except ValueError:
continue
key = (thing_a, thing_b)
value = data.setdefault(key, ([0, 5], 0))
data[key] = (value[0]+[number], value[1]+1)
except requests.RequestException as e:
print(f"Error fetching URL: {e}")
return {}
# Calculate averages
return {key: (sum(nums)/len(nums), pop) for key, (nums, pop) in data.items()}
def process_main(url): # i'm sorry you have to deal with this code
try:
# Fetch the page content
response = requests.get(url)
response.raise_for_status() # Ensure the request was successful
# Parse the HTML to get the raw wikitext from the textarea
soup = BeautifulSoup(response.text, 'html.parser')
textarea = soup.find('textarea', id='wpTextbox1')
if not textarea:
print("Could not find document content in the page")
return {}
raw_text = textarea.get_text()
# Preprocess the text: replace newline followed by | with ||, then split into entries
processed_text = raw_text.replace('\n|', '||')
entries = re.split(r'\|\|-\s*', processed_text)
result = {}
for entry in entries:
entry = entry.strip()
if not entry.startswith('|'):
continue
# Split the entry into parts, considering possible spaces around ||
parts = re.split(r'\s*\|\|\s*', entry)
if parts[-1] == "}":
parts = parts[:-1] # sorry
if parts[0] == "" and len(parts) == 7:
parts = parts[1:]
else:
print("AN UNEXPECTED THING HAPPENED.")
print("CHECK THAT THE TABLE LOOKS NORMAL.")
print("IF THE TABLE LOOKS NORMAL, TELL HKM HIS CODE IS BROKEN.")
print(parts)
if len(parts) < 6:
continue # Not enough columns
# Extract AUTHOR from the first part
author = parts[0].lstrip('|').strip()
if not author:
continue
# Split the second part into LINK and NAME
link_name = parts[1].strip()
if link_name[0] == "[" and link_name[-1] == "]":
link_name = link_name[1:-1]
split_link_name = link_name.split(maxsplit=1)
if len(split_link_name) < 2:
continue
link, name = split_link_name[0], split_link_name[1]
else:
name = link_name
link = ""
tuning = parts[2].strip()
notes = parts[3].strip()
pop = parts[5].strip()
print("AUTHOR, NAME, LINK, TUNING") ###############################3
print(author, name, link, tuning)
print()
# pop is collected in case the user puts in a new entry at the top
# accidentally trashing old tuning and notes
# Add to the result dictionary
if (author, name) in result:
old_pop = result[(author, name)][2]
try:
old_pop = int(old_pop)
except:
result[(author, name)] = (tuning, notes, pop, link)
try:
pop = int(pop)
except: # idk what this error is
continue
if old_pop < new_pop:
result[(author, name)] = (tuning, notes, pop, link)
else:
continue
else:
result[(author, name)] = (tuning, notes, pop, link)
except requests.RequestException as e:
print(f"Error fetching URL: {e}")
return {}
return result
talk = "https://en.xen.wiki/index.php?title=User_talk:Hkm/Rankings&action=edit"
main = "https://en.xen.wiki/index.php?title=User:Hkm/Rankings&action=edit"
averages = process_talk(talk)
data = process_main(main)
print('''
{| class="wikitable" style="margin: auto;"
! Creator !! Work !! Tuning !! Notes !! Rating !! Pop ''')
for (author, work), (rating, pop) in sorted(averages.items(), key=lambda a: -a[1][0]):
tuning, notes, _, link = data.setdefault((author, work), ("", "", "", ""))
if link:
print(f"|-\n| {author} || [{link} {work}] || {tuning} || {notes} || {rating:.2f} || {pop}")
else:
print(f"|-\n| {author} || {work} || {tuning} || {notes} || {rating:.2f} || {pop}")
print("|}")