User:Hkm/Rankings program: Difference between revisions

Revision as of 03:19, 14 March 2025

‎<syntaxhighlight lang="python" line>

import re import requests from bs4 import BeautifulSoup

def process_talk(url):

   pattern = r'^([^:]+):\s+([^.]*)\.\s+([0-9.]+)$'
   data = {}

   try:
       response = requests.get(url)
       response.raise_for_status()  # Raise exception for HTTP errors
       
       # Parse the edit page content to find the wikitext
       soup = BeautifulSoup(response.text, 'html.parser')
       textarea = soup.find('textarea', {'id': 'wpTextbox1'})
       
       if not textarea:
           print("Could not find document content in the page")
           return {}
           
       lines = textarea.text.split('\n')

       for line in lines:
           line = line.strip()
           match = re.match(pattern, line)
           if not match:
               continue
           
           thing_a = match.group(1)
           thing_b = match.group(2)
           number_str = match.group(3)
           
           if len(thing_a) == 0 or len(thing_b) == 0:
               continue
               
           if len(thing_a) > 40 or len(thing_b) > 40:
               continue

           try:
               number = float(number_str)
               if not (0 <= number <= 5):
                   continue
           except ValueError:
               continue

           key = (thing_a, thing_b)
           value = data.setdefault(key, ([0, 5], 0))

           data[key] = (value[0]+[number], value[1]+1)
           
   
   except requests.RequestException as e:
       print(f"Error fetching URL: {e}")
       return {}

   # Calculate averages
   return {key: (sum(nums)/len(nums), pop) for key, (nums, pop) in data.items()}

def process_main(url): # i'm sorry you have to deal with this code

   try:
       # Fetch the page content
       response = requests.get(url)
       response.raise_for_status()  # Ensure the request was successful
   
       # Parse the HTML to get the raw wikitext from the textarea
       soup = BeautifulSoup(response.text, 'html.parser')
       textarea = soup.find('textarea', id='wpTextbox1')
       if not textarea:
           print("Could not find document content in the page")
           return {}

       raw_text = textarea.get_text()

       # Preprocess the text: replace newline followed by | with ||, then split into entries
       processed_text = raw_text.replace('\n|', '||')
       entries = re.split(r'\|\|-\s*', processed_text)

       result = {}

       for entry in entries:
           entry = entry.strip()
           if not entry.startswith('|'):
               continue
   
           # Split the entry into parts, considering possible spaces around ||
           parts = re.split(r'\s*\|\|\s*', entry)

           if parts[-1] == "}":
               parts = parts[:-1] # sorry

           if parts[0] == "" and len(parts) == 7:
               parts = parts[1:]
           else:
               print("AN UNEXPECTED THING HAPPENED.")
               print("CHECK THAT THE TABLE LOOKS NORMAL.")
               print("IF THE TABLE LOOKS NORMAL, TELL HKM HIS CODE IS BROKEN.")
               print(parts)
           if len(parts) < 6:
               continue  # Not enough columns
   
           # Extract AUTHOR from the first part
           author = parts[0].lstrip('|').strip()
           if not author:
               continue
           
           # Split the second part into LINK and NAME
           link_name = parts[1].strip()
           if link_name[0] == "[" and link_name[-1] == "]":
               link_name = link_name[1:-1]
               split_link_name = link_name.split(maxsplit=1)
               if len(split_link_name) < 2:
                   continue
               link, name = split_link_name[0], split_link_name[1]
           else:
               name = link_name
               link = ""
   
           tuning = parts[2].strip()
           notes = parts[3].strip()
           pop = parts[5].strip()

           print("AUTHOR, NAME, LINK, TUNING") ###############################3
           print(author, name, link, tuning)
           print()

           # pop is collected in case the user puts in a new entry at the top
           # accidentally trashing old tuning and notes
   
           # Add to the result dictionary
           if (author, name) in result:
               old_pop = result[(author, name)][2]
               
               try:
                   old_pop = int(old_pop)
               except:
                   result[(author, name)] = (tuning, notes, pop, link)
       
               try:
                   pop = int(pop)
               except: # idk what this error is
                   continue

               if old_pop < new_pop:
                   result[(author, name)] = (tuning, notes, pop, link)
               else:
                   continue
           else:
               result[(author, name)] = (tuning, notes, pop, link)
   except requests.RequestException as e:
       print(f"Error fetching URL: {e}")
       return {}

   return result

talk = "https://en.xen.wiki/index.php?title=User_talk:Hkm/Rankings&action=edit" main = "https://en.xen.wiki/index.php?title=User:Hkm/Rankings&action=edit" averages = process_talk(talk) data = process_main(main)

print(

Creator

Work

Tuning

Notes

Rating

Pop )

for (author, work), (rating, pop) in sorted(averages.items(), key=lambda a: -a[1][0]):

   tuning, notes, _, link = data.setdefault((author, work), ("", "", "", ""))
   if link:
       print(f"|-\n| {author} || [{link} {work}] || {tuning} || {notes} || {rating:.2f} || {pop}")
   else:
       print(f"|-\n| {author} || {work} || {tuning} || {notes} || {rating:.2f} || {pop}")

print("|}")

‎</syntaxhighlight>

User:Hkm/Rankings program: Difference between revisions

Revision as of 03:19, 14 March 2025

Navigation menu

Search