User:Hkm/Rankings program: Difference between revisions
< User:Hkm
No edit summary |
No edit summary |
||
| Line 9: | Line 9: | ||
def process_talk(url): | def process_talk(url): | ||
#pattern= r'^([^:]+):\s+([^.]*)\.\s+([0-9.]+)$' | |||
pattern = r'^([^:]+):\s+(.*)\.\s+([0-9.]+)$' | pattern = r'^([^:]+):\s+(.*)\.\s+([0-9.]+)$' | ||
data = {} | data = {} | ||
| Line 39: | Line 40: | ||
continue | continue | ||
if len(thing_a) > 40 or len(thing_b) > | if len(thing_a) > 40 or len(thing_b) > 100: | ||
continue | continue | ||
| Line 79: | Line 80: | ||
(before, after) = raw_text.split('{|', 1) | (before, after) = raw_text.split('{|', 1) | ||
(raw_text, after) = after.split('|}', 1) | (raw_text, after) = after.split('|}', 1) | ||
if after[0] == "\n": | |||
after = after[1:] | |||
# add "last updated by" | |||
index = after.find('\n') | |||
first_part = after[:index + 1] | |||
rest_part = after[index + 1:] | |||
if first_part.startswith("Last updated by"): | |||
new_first = "\nLast updated by ~~~~.\n" | |||
after = new_first + rest_part | |||
else: | |||
after = "\nLast updated by ~~~~.\n" + after | |||
# Preprocess the text: replace newline followed by | with ||, then split into entries | # Preprocess the text: replace newline followed by | with ||, then split into entries | ||
| Line 172: | Line 186: | ||
clip = before | clip = before | ||
clip += '''{| class="wikitable sortable" style="margin: auto;" | clip += '''{| class="wikitable sortable" style="margin: auto; max-width: 800px; width: 100%;" | ||
! Creator !! Work !! Tuning !! Notes !! R !! # | ! Creator !! Work !! Tuning !! Notes !! R !! # | ||
''' | ''' | ||
# print from talk page | |||
for (author, work), (rating, pop) in sorted(averages.items(), key=lambda a: -a[1][0]): | for (author, work), (rating, pop) in sorted(averages.items(), key=lambda a: -a[1][0]): | ||
tuning, notes, _, link = data.setdefault((author, work), ("", "", "", "")) | tuning, notes, _, link = data.setdefault((author, work), ("", "", "", "")) | ||
| Line 184: | Line 199: | ||
clip += (f"|-\n| {author} || {work} || {tuning} || {notes} || {rating:.2f} || {pop}\n") | clip += (f"|-\n| {author} || {work} || {tuning} || {notes} || {rating:.2f} || {pop}\n") | ||
# print main pages not in talk page | |||
for i in data: | |||
if (i[0], i[1]) not in averages: | |||
(author, work) = i | |||
tuning, notes, _, link = data.setdefault((author, work), ("", "", "", "")) | |||
if link: | |||
clip += (f"|-\n| {author} || [{link} {work}] || {tuning} || {notes} || || \n") | |||
else: | |||
clip += (f"|-\n| {author} || {work} || {tuning} || {notes} || || \n") | |||
clip += "|}" | clip += "|}" | ||
clip += after | clip += after | ||
pyperclip.copy(clip) | pyperclip.copy(clip) | ||
</syntaxhighlight> | </syntaxhighlight> | ||
Revision as of 22:58, 27 March 2025
'''when adding a new work, the user should put it at the bottom
so that it does not replace the tuning and notes if that work was already there'''
import re
import requests
from bs4 import BeautifulSoup
import pyperclip
def process_talk(url):
#pattern= r'^([^:]+):\s+([^.]*)\.\s+([0-9.]+)$'
pattern = r'^([^:]+):\s+(.*)\.\s+([0-9.]+)$'
data = {}
try:
response = requests.get(url)
response.raise_for_status() # Raise exception for HTTP errors
# Parse the edit page content to find the wikitext
soup = BeautifulSoup(response.text, 'html.parser')
textarea = soup.find('textarea', {'id': 'wpTextbox1'})
if not textarea:
print("Could not find document content in the page")
return {}
lines = textarea.text.split('\n')
for line in lines:
line = line.strip()
match = re.match(pattern, line)
if not match:
continue
thing_a = match.group(1)
thing_b = match.group(2)
number_str = match.group(3)
if len(thing_a) == 0 or len(thing_b) == 0:
continue
if len(thing_a) > 40 or len(thing_b) > 100:
continue
try:
number = float(number_str)
if not (0 <= number <= 5):
continue
except ValueError:
continue
key = (thing_a, thing_b)
value = data.setdefault(key, ([0, 5], 0))
data[key] = (value[0]+[number], value[1]+1)
except requests.RequestException as e:
print(f"Error fetching URL: {e}")
return {}
# Calculate averages
return {key: (sum(nums)/len(nums), pop) for key, (nums, pop) in data.items()}
def process_main(url): # i'm sorry you have to deal with this code
try:
# Fetch the page content
response = requests.get(url)
response.raise_for_status() # Ensure the request was successful
# Parse the HTML to get the raw wikitext from the textarea
soup = BeautifulSoup(response.text, 'html.parser')
textarea = soup.find('textarea', id='wpTextbox1')
if not textarea:
print("Could not find document content in the page")
return {}
raw_text = textarea.get_text()
(before, after) = raw_text.split('{|', 1)
(raw_text, after) = after.split('|}', 1)
if after[0] == "\n":
after = after[1:]
# add "last updated by"
index = after.find('\n')
first_part = after[:index + 1]
rest_part = after[index + 1:]
if first_part.startswith("Last updated by"):
new_first = "\nLast updated by ~~~~.\n"
after = new_first + rest_part
else:
after = "\nLast updated by ~~~~.\n" + after
# Preprocess the text: replace newline followed by | with ||, then split into entries
processed_text = raw_text.replace('\n|', '||')
entries = re.split(r'\|\|-\s*', processed_text)
result = {}
for entry in entries:
entry = entry.strip()
if not entry.startswith('|'):
continue
# Split the entry into parts, considering possible spaces around ||
parts = re.split(r'\s*\|\|\s*', entry)
if parts[-1] == "}":
parts = parts[:-1] # sorry
if parts[0] == "" and len(parts) == 7:
parts = parts[1:]
else:
print("AN UNEXPECTED THING HAPPENED.")
print("CHECK THAT THE TABLE LOOKS NORMAL.")
print("IF THE TABLE LOOKS NORMAL, TELL HKM HIS CODE IS BROKEN.")
print(parts)
if len(parts) < 6:
continue # Not enough columns
# Extract AUTHOR from the first part
author = parts[0].lstrip('|').strip()
if not author:
continue
# Split the second part into LINK and NAME
link_name = parts[1].strip()
if link_name[0] == "[" and link_name[-1] == "]":
link_name = link_name[1:-1]
split_link_name = link_name.split(maxsplit=1)
if len(split_link_name) < 2:
continue
link, name = split_link_name[0], split_link_name[1]
else:
name = link_name
link = ""
tuning = parts[2].strip()
notes = parts[3].strip()
pop = parts[5].strip()
#print("AUTHOR, NAME, LINK, TUNING") ###############################3
#print(author, name, link, tuning)
#print()
# pop is collected in case the user puts in a new entry at the top
# accidentally trashing old tuning and notes
# Add to the result dictionary
if (author, name) in result:
old_pop = result[(author, name)][2]
try:
old_pop = int(old_pop)
except:
result[(author, name)] = (tuning, notes, pop, link)
try:
pop = int(pop)
except: # idk what this error is
continue
if old_pop < new_pop:
result[(author, name)] = (tuning, notes, pop, link)
else:
continue
else:
result[(author, name)] = (tuning, notes, pop, link)
except requests.RequestException as e:
print(f"Error fetching URL: {e}")
return {}
return result, before, after
talk = "https://en.xen.wiki/index.php?title=User_talk:Hkm/Rankings&action=edit"
main = "https://en.xen.wiki/index.php?title=User:Hkm/Rankings&action=edit"
averages = process_talk(talk)
data, before, after = process_main(main)
clip = before
clip += '''{| class="wikitable sortable" style="margin: auto; max-width: 800px; width: 100%;"
! Creator !! Work !! Tuning !! Notes !! R !! #
'''
# print from talk page
for (author, work), (rating, pop) in sorted(averages.items(), key=lambda a: -a[1][0]):
tuning, notes, _, link = data.setdefault((author, work), ("", "", "", ""))
if link:
clip += (f"|-\n| {author} || [{link} {work}] || {tuning} || {notes} || {rating:.2f} || {pop}\n")
else:
clip += (f"|-\n| {author} || {work} || {tuning} || {notes} || {rating:.2f} || {pop}\n")
# print main pages not in talk page
for i in data:
if (i[0], i[1]) not in averages:
(author, work) = i
tuning, notes, _, link = data.setdefault((author, work), ("", "", "", ""))
if link:
clip += (f"|-\n| {author} || [{link} {work}] || {tuning} || {notes} || || \n")
else:
clip += (f"|-\n| {author} || {work} || {tuning} || {notes} || || \n")
clip += "|}"
clip += after
pyperclip.copy(clip)