User:Hkm/Rankings program

import re
import requests
from bs4 import BeautifulSoup

def process_talk(url):
    pattern = r'^([^:]+):\s+([^.]*)\.\s+([0-9.]+)$'
    data = {}

    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise exception for HTTP errors
        
        # Parse the edit page content to find the wikitext
        soup = BeautifulSoup(response.text, 'html.parser')
        textarea = soup.find('textarea', {'id': 'wpTextbox1'})
        
        if not textarea:
            print("Could not find document content in the page")
            return {}
            
        lines = textarea.text.split('\n')

        for line in lines:
            line = line.strip()
            match = re.match(pattern, line)
            if not match:
                continue
            
            thing_a = match.group(1)
            thing_b = match.group(2)
            number_str = match.group(3)
            
            if len(thing_a) == 0 or len(thing_b) == 0:
                continue
                
            if len(thing_a) > 40 or len(thing_b) > 40:
                continue

            try:
                number = float(number_str)
                if not (0 <= number <= 5):
                    continue
            except ValueError:
                continue

            key = (thing_a, thing_b)
            value = data.setdefault(key, ([0, 5], 0))

            data[key] = (value[0]+[number], value[1]+1)
            
    
    except requests.RequestException as e:
        print(f"Error fetching URL: {e}")
        return {}

    # Calculate averages
    return {key: (sum(nums)/len(nums), pop) for key, (nums, pop) in data.items()}

def process_main(url): # i'm sorry you have to deal with this code
    try:
        # Fetch the page content
        response = requests.get(url)
        response.raise_for_status()  # Ensure the request was successful
    
        # Parse the HTML to get the raw wikitext from the textarea
        soup = BeautifulSoup(response.text, 'html.parser')
        textarea = soup.find('textarea', id='wpTextbox1')
        if not textarea:
            print("Could not find document content in the page")
            return {}

        raw_text = textarea.get_text()

        # Preprocess the text: replace newline followed by | with ||, then split into entries
        processed_text = raw_text.replace('\n|', '||')
        entries = re.split(r'\|\|-\s*', processed_text)

        result = {}

        for entry in entries:
            entry = entry.strip()
            if not entry.startswith('|'):
                continue
    
            # Split the entry into parts, considering possible spaces around ||
            parts = re.split(r'\s*\|\|\s*', entry)

            if parts[-1] == "}":
                parts = parts[:-1] # sorry

            if parts[0] == "" and len(parts) == 7:
                parts = parts[1:]
            else:
                print("AN UNEXPECTED THING HAPPENED.")
                print("CHECK THAT THE TABLE LOOKS NORMAL.")
                print("IF THE TABLE LOOKS NORMAL, TELL HKM HIS CODE IS BROKEN.")
                print(parts)
            if len(parts) < 6:
                continue  # Not enough columns
    
            # Extract AUTHOR from the first part
            author = parts[0].lstrip('|').strip()
            if not author:
                continue
            
            # Split the second part into LINK and NAME
            link_name = parts[1].strip()
            if link_name[0] == "[" and link_name[-1] == "]":
                link_name = link_name[1:-1]
                split_link_name = link_name.split(maxsplit=1)
                if len(split_link_name) < 2:
                    continue
                link, name = split_link_name[0], split_link_name[1]
            else:
                name = link_name
                link = ""
    
            tuning = parts[2].strip()
            notes = parts[3].strip()
            pop = parts[5].strip()

            print("AUTHOR, NAME, LINK, TUNING") ###############################3
            print(author, name, link, tuning)
            print()
            

            # pop is collected in case the user puts in a new entry at the top
            # accidentally trashing old tuning and notes
    
            # Add to the result dictionary
            if (author, name) in result:
                old_pop = result[(author, name)][2]
                
                try:
                    old_pop = int(old_pop)
                except:
                    result[(author, name)] = (tuning, notes, pop, link)
        
                try:
                    pop = int(pop)
                except: # idk what this error is
                    continue

                if old_pop < new_pop:
                    result[(author, name)] = (tuning, notes, pop, link)
                else:
                    continue
            else:
                result[(author, name)] = (tuning, notes, pop, link)
    except requests.RequestException as e:
        print(f"Error fetching URL: {e}")
        return {}

    return result
        



talk = "https://en.xen.wiki/index.php?title=User_talk:Hkm/Rankings&action=edit"
main = "https://en.xen.wiki/index.php?title=User:Hkm/Rankings&action=edit"
averages = process_talk(talk)
data = process_main(main)

print('''
{| class="wikitable" style="margin: auto;"
! Creator !! Work !! Tuning !! Notes !! Rating !! Pop ''')

for (author, work), (rating, pop) in sorted(averages.items(), key=lambda a: -a[1][0]):
    tuning, notes, _, link = data.setdefault((author, work), ("", "", "", ""))
    if link:
        print(f"|-\n| {author} || [{link} {work}] || {tuning} || {notes} || {rating:.2f} || {pop}")
    else:
        print(f"|-\n| {author} || {work} || {tuning} || {notes} || {rating:.2f} || {pop}")

print("|}")


‎
User:Hkm/Rankings program

Navigation menu

Search