User:Hkm/Rankings program: Difference between revisions

From Xenharmonic Wiki
Jump to navigation Jump to search
Hkm (talk | contribs)
Created page with "‎<nowiki><syntaxhighlight lang="python" line></nowiki> ‎<nowiki></syntaxhighlight></nowiki>"
 
Hkm (talk | contribs)
No edit summary
Line 1: Line 1:
‎<nowiki><syntaxhighlight lang="python" line></nowiki>
‎<nowiki><syntaxhighlight lang="python" line></nowiki>
import re
import requests
from bs4 import BeautifulSoup
def process_talk(url):
    pattern = r'^([^:]+):\s+([^.]*)\.\s+([0-9.]+)$'
    data = {}
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise exception for HTTP errors
       
        # Parse the edit page content to find the wikitext
        soup = BeautifulSoup(response.text, 'html.parser')
        textarea = soup.find('textarea', {'id': 'wpTextbox1'})
       
        if not textarea:
            print("Could not find document content in the page")
            return {}
           
        lines = textarea.text.split('\n')
        for line in lines:
            line = line.strip()
            match = re.match(pattern, line)
            if not match:
                continue
           
            thing_a = match.group(1)
            thing_b = match.group(2)
            number_str = match.group(3)
           
            if len(thing_a) == 0 or len(thing_b) == 0:
                continue
               
            if len(thing_a) > 40 or len(thing_b) > 40:
                continue
            try:
                number = float(number_str)
                if not (0 <= number <= 5):
                    continue
            except ValueError:
                continue
            key = (thing_a, thing_b)
            value = data.setdefault(key, ([0, 5], 0))
            data[key] = (value[0]+[number], value[1]+1)
           
   
    except requests.RequestException as e:
        print(f"Error fetching URL: {e}")
        return {}
    # Calculate averages
    return {key: (sum(nums)/len(nums), pop) for key, (nums, pop) in data.items()}
def process_main(url): # i'm sorry you have to deal with this code
    try:
        # Fetch the page content
        response = requests.get(url)
        response.raise_for_status()  # Ensure the request was successful
   
        # Parse the HTML to get the raw wikitext from the textarea
        soup = BeautifulSoup(response.text, 'html.parser')
        textarea = soup.find('textarea', id='wpTextbox1')
        if not textarea:
            print("Could not find document content in the page")
            return {}
        raw_text = textarea.get_text()
        # Preprocess the text: replace newline followed by | with ||, then split into entries
        processed_text = raw_text.replace('\n|', '||')
        entries = re.split(r'\|\|-\s*', processed_text)
        result = {}
        for entry in entries:
            entry = entry.strip()
            if not entry.startswith('|'):
                continue
   
            # Split the entry into parts, considering possible spaces around ||
            parts = re.split(r'\s*\|\|\s*', entry)
            if parts[-1] == "}":
                parts = parts[:-1] # sorry
            if parts[0] == "" and len(parts) == 7:
                parts = parts[1:]
            else:
                print("AN UNEXPECTED THING HAPPENED.")
                print("CHECK THAT THE TABLE LOOKS NORMAL.")
                print("IF THE TABLE LOOKS NORMAL, TELL HKM HIS CODE IS BROKEN.")
                print(parts)
            if len(parts) < 6:
                continue  # Not enough columns
   
            # Extract AUTHOR from the first part
            author = parts[0].lstrip('|').strip()
            if not author:
                continue
           
            # Split the second part into LINK and NAME
            link_name = parts[1].strip()
            if link_name[0] == "[" and link_name[-1] == "]":
                link_name = link_name[1:-1]
                split_link_name = link_name.split(maxsplit=1)
                if len(split_link_name) < 2:
                    continue
                link, name = split_link_name[0], split_link_name[1]
            else:
                name = link_name
                link = ""
   
            tuning = parts[2].strip()
            notes = parts[3].strip()
            pop = parts[5].strip()
            print("AUTHOR, NAME, LINK, TUNING") ###############################3
            print(author, name, link, tuning)
            print()
           
            # pop is collected in case the user puts in a new entry at the top
            # accidentally trashing old tuning and notes
   
            # Add to the result dictionary
            if (author, name) in result:
                old_pop = result[(author, name)][2]
               
                try:
                    old_pop = int(old_pop)
                except:
                    result[(author, name)] = (tuning, notes, pop, link)
       
                try:
                    pop = int(pop)
                except: # idk what this error is
                    continue
                if old_pop < new_pop:
                    result[(author, name)] = (tuning, notes, pop, link)
                else:
                    continue
            else:
                result[(author, name)] = (tuning, notes, pop, link)
    except requests.RequestException as e:
        print(f"Error fetching URL: {e}")
        return {}
    return result
       
talk = "https://en.xen.wiki/index.php?title=User_talk:Hkm/Rankings&action=edit"
main = "https://en.xen.wiki/index.php?title=User:Hkm/Rankings&action=edit"
averages = process_talk(talk)
data = process_main(main)
print('''
{| class="wikitable" style="margin: auto;"
! Creator !! Work !! Tuning !! Notes !! Rating !! Pop ''')
for (author, work), (rating, pop) in sorted(averages.items(), key=lambda a: -a[1][0]):
    tuning, notes, _, link = data.setdefault((author, work), ("", "", "", ""))
    if link:
        print(f"|-\n| {author} || [{link} {work}] || {tuning} || {notes} || {rating:.2f} || {pop}")
    else:
        print(f"|-\n| {author} || {work} || {tuning} || {notes} || {rating:.2f} || {pop}")
print("|}")


‎<nowiki></syntaxhighlight></nowiki>
‎<nowiki></syntaxhighlight></nowiki>

Revision as of 03:19, 14 March 2025

‎<syntaxhighlight lang="python" line>

import re import requests from bs4 import BeautifulSoup

def process_talk(url):

   pattern = r'^([^:]+):\s+([^.]*)\.\s+([0-9.]+)$'
   data = {}
   try:
       response = requests.get(url)
       response.raise_for_status()  # Raise exception for HTTP errors
       
       # Parse the edit page content to find the wikitext
       soup = BeautifulSoup(response.text, 'html.parser')
       textarea = soup.find('textarea', {'id': 'wpTextbox1'})
       
       if not textarea:
           print("Could not find document content in the page")
           return {}
           
       lines = textarea.text.split('\n')
       for line in lines:
           line = line.strip()
           match = re.match(pattern, line)
           if not match:
               continue
           
           thing_a = match.group(1)
           thing_b = match.group(2)
           number_str = match.group(3)
           
           if len(thing_a) == 0 or len(thing_b) == 0:
               continue
               
           if len(thing_a) > 40 or len(thing_b) > 40:
               continue
           try:
               number = float(number_str)
               if not (0 <= number <= 5):
                   continue
           except ValueError:
               continue
           key = (thing_a, thing_b)
           value = data.setdefault(key, ([0, 5], 0))
           data[key] = (value[0]+[number], value[1]+1)
           
   
   except requests.RequestException as e:
       print(f"Error fetching URL: {e}")
       return {}
   # Calculate averages
   return {key: (sum(nums)/len(nums), pop) for key, (nums, pop) in data.items()}

def process_main(url): # i'm sorry you have to deal with this code

   try:
       # Fetch the page content
       response = requests.get(url)
       response.raise_for_status()  # Ensure the request was successful
   
       # Parse the HTML to get the raw wikitext from the textarea
       soup = BeautifulSoup(response.text, 'html.parser')
       textarea = soup.find('textarea', id='wpTextbox1')
       if not textarea:
           print("Could not find document content in the page")
           return {}
       raw_text = textarea.get_text()
       # Preprocess the text: replace newline followed by | with ||, then split into entries
       processed_text = raw_text.replace('\n|', '||')
       entries = re.split(r'\|\|-\s*', processed_text)
       result = {}
       for entry in entries:
           entry = entry.strip()
           if not entry.startswith('|'):
               continue
   
           # Split the entry into parts, considering possible spaces around ||
           parts = re.split(r'\s*\|\|\s*', entry)
           if parts[-1] == "}":
               parts = parts[:-1] # sorry
           if parts[0] == "" and len(parts) == 7:
               parts = parts[1:]
           else:
               print("AN UNEXPECTED THING HAPPENED.")
               print("CHECK THAT THE TABLE LOOKS NORMAL.")
               print("IF THE TABLE LOOKS NORMAL, TELL HKM HIS CODE IS BROKEN.")
               print(parts)
           if len(parts) < 6:
               continue  # Not enough columns
   
           # Extract AUTHOR from the first part
           author = parts[0].lstrip('|').strip()
           if not author:
               continue
           
           # Split the second part into LINK and NAME
           link_name = parts[1].strip()
           if link_name[0] == "[" and link_name[-1] == "]":
               link_name = link_name[1:-1]
               split_link_name = link_name.split(maxsplit=1)
               if len(split_link_name) < 2:
                   continue
               link, name = split_link_name[0], split_link_name[1]
           else:
               name = link_name
               link = ""
   
           tuning = parts[2].strip()
           notes = parts[3].strip()
           pop = parts[5].strip()
           print("AUTHOR, NAME, LINK, TUNING") ###############################3
           print(author, name, link, tuning)
           print()
           
           # pop is collected in case the user puts in a new entry at the top
           # accidentally trashing old tuning and notes
   
           # Add to the result dictionary
           if (author, name) in result:
               old_pop = result[(author, name)][2]
               
               try:
                   old_pop = int(old_pop)
               except:
                   result[(author, name)] = (tuning, notes, pop, link)
       
               try:
                   pop = int(pop)
               except: # idk what this error is
                   continue
               if old_pop < new_pop:
                   result[(author, name)] = (tuning, notes, pop, link)
               else:
                   continue
           else:
               result[(author, name)] = (tuning, notes, pop, link)
   except requests.RequestException as e:
       print(f"Error fetching URL: {e}")
       return {}
   return result
       


talk = "https://en.xen.wiki/index.php?title=User_talk:Hkm/Rankings&action=edit" main = "https://en.xen.wiki/index.php?title=User:Hkm/Rankings&action=edit" averages = process_talk(talk) data = process_main(main)

print(

Creator Work Tuning Notes Rating Pop )

for (author, work), (rating, pop) in sorted(averages.items(), key=lambda a: -a[1][0]):

   tuning, notes, _, link = data.setdefault((author, work), ("", "", "", ""))
   if link:
       print(f"|-\n| {author} || [{link} {work}] || {tuning} || {notes} || {rating:.2f} || {pop}")
   else:
       print(f"|-\n| {author} || {work} || {tuning} || {notes} || {rating:.2f} || {pop}")

print("|}")


‎</syntaxhighlight>