User:Hkm/Rankings program: Difference between revisions

From Xenharmonic Wiki
Jump to navigation Jump to search
Hkm (talk | contribs)
No edit summary
Hkm (talk | contribs)
No edit summary
Line 7: Line 7:
from bs4 import BeautifulSoup
from bs4 import BeautifulSoup
import pyperclip
import pyperclip
def extract_display_text(wiki_text):
    # Process internal links: [[Page|Display]] → Display (or Page if no Display)
    processed_text = re.sub(
        r'\[\[(.*?)(?:\|(.*?))?\]\]',
        lambda m: m.group(2) if m.group(2) else m.group(1),
        wiki_text
    )
   
    # Process external links: [URL Display] → Display (remove if no Display)
    processed_text = re.sub(
        r'\[(https?://\S+)(?:\s+([^\]]+))?\]',
        lambda m: m.group(2) if m.group(2) else '',
        processed_text
    )
   
    return processed_text


def process_talk(url):
def process_talk(url):
Line 127: Line 144:
              
              
             # Split the second part into LINK and NAME
             # Split the second part into LINK and NAME
             link_name = parts[1].strip()
             print(parts)
             if link_name[0] == "[" and link_name[-1] == "]":
            link = parts[1].strip()
                link_name = link_name[1:-1]
             name = extract_display_text(link)
                split_link_name = link_name.split(maxsplit=1)
            print("LINK " + link + ". NAME: " + name) ##################
                if len(split_link_name) < 2:
                    continue
                link, name = split_link_name[0], split_link_name[1]
            else:
                name = link_name
                link = ""
      
      
             tuning = parts[2].strip()
             tuning = parts[2].strip()
             notes = parts[3].strip()
             notes = parts[3].strip()
             pop = parts[5].strip()
             pop = parts[5].strip()
            #print("AUTHOR, NAME, LINK, TUNING") ###############################3
            #print(author, name, link, tuning)
            #print()
              
              
            # pop is collected in case the user puts in a new entry at the top
            # accidentally trashing old tuning and notes
   
             # Add to the result dictionary
             # Add to the result dictionary
             if (author, name) in result:
             if (author, name) in result:
Line 164: Line 167:
                     continue
                     continue


                '''
                 if old_pop < new_pop:
                 if old_pop < new_pop:
                     result[(author, name)] = (tuning, notes, pop, link)
                     result[(author, name)] = (tuning, notes, pop, link)
                 else:
                 else:
                     continue
                     continue
                '''
             else:
             else:
                 result[(author, name)] = (tuning, notes, pop, link)
                 result[(author, name)] = (tuning, notes, pop, link)
Line 180: Line 185:


talk = "https://en.xen.wiki/index.php?title=User_talk:Hkm/Rankings&action=edit"
talk = "https://en.xen.wiki/index.php?title=User_talk:Hkm/Rankings&action=edit"
main = "https://en.xen.wiki/index.php?title=User:Hkm/Rankings&action=edit"
main = "https://en.xen.wiki/index.php?title=List_of_xenharmonic_music_by_community_ratings&action=edit"
averages = process_talk(talk)
averages = process_talk(talk)
data, before, after = process_main(main)
data, before, after = process_main(main)
Line 193: Line 198:
# print from talk page
# print from talk page
for (author, work), (rating, pop) in sorted(averages.items(), key=lambda a: -a[1][0]):
for (author, work), (rating, pop) in sorted(averages.items(), key=lambda a: -a[1][0]):
     tuning, notes, _, link = data.setdefault((author, work), ("", "", "", ""))
     tuning, notes, _, link = data.setdefault((author, work), ("", "", "", work))
     if link:
     clip += (f"|-\n| {author} || {link} || {tuning} || {notes} || {rating:.2f} || {pop}\n")
        clip += (f"|-\n| {author} || [{link} {work}] || {tuning} || {notes} || {rating:.2f} || {pop}\n")
    else:
        clip += (f"|-\n| {author} || {work} || {tuning} || {notes} || {rating:.2f} || {pop}\n")


# print main pages not in talk page
# print main pages not in talk page
Line 204: Line 206:
         (author, work) = i
         (author, work) = i
         tuning, notes, _, link = data.setdefault((author, work), ("", "", "", ""))
         tuning, notes, _, link = data.setdefault((author, work), ("", "", "", ""))
         if link:
         clip += (f"|-\n| {author} || {link} || {tuning} || {notes} || || \n")
            clip += (f"|-\n| {author} || [{link} {work}] || {tuning} || {notes} || || \n")
 
        else:
            clip += (f"|-\n| {author} || {work} || {tuning} || {notes} || || \n")
          
          
clip += "|}"
clip += "|}"

Revision as of 20:40, 29 March 2025

'''when adding a new work, the user should put it at the bottom
so that it does not replace the tuning and notes if that work was already there'''
import re
import requests
from bs4 import BeautifulSoup
import pyperclip

def extract_display_text(wiki_text):
    # Process internal links: [[Page|Display]] → Display (or Page if no Display)
    processed_text = re.sub(
        r'\[\[(.*?)(?:\|(.*?))?\]\]',
        lambda m: m.group(2) if m.group(2) else m.group(1),
        wiki_text
    )
    
    # Process external links: [URL Display] → Display (remove if no Display)
    processed_text = re.sub(
        r'\[(https?://\S+)(?:\s+([^\]]+))?\]',
        lambda m: m.group(2) if m.group(2) else '',
        processed_text
    )
    
    return processed_text

def process_talk(url):
    #pattern= r'^([^:]+):\s+([^.]*)\.\s+([0-9.]+)$'
    pattern = r'^([^:]+):\s+(.*)\.\s+([0-9.]+)$'
    data = {}

    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise exception for HTTP errors
        
        # Parse the edit page content to find the wikitext
        soup = BeautifulSoup(response.text, 'html.parser')
        textarea = soup.find('textarea', {'id': 'wpTextbox1'})
        
        if not textarea:
            print("Could not find document content in the page")
            return {}
            
        lines = textarea.text.split('\n')

        for line in lines:
            line = line.strip()
            match = re.match(pattern, line)
            if not match:
                continue
            
            thing_a = match.group(1)
            thing_b = match.group(2)
            number_str = match.group(3)
            
            if len(thing_a) == 0 or len(thing_b) == 0:
                continue
                
            if len(thing_a) > 40 or len(thing_b) > 100:
                continue

            try:
                number = float(number_str)
                if not (0 <= number <= 5):
                    continue
            except ValueError:
                continue

            key = (thing_a, thing_b)
            value = data.setdefault(key, ([0, 5], 0))

            data[key] = (value[0]+[number], value[1]+1)
            
    
    except requests.RequestException as e:
        print(f"Error fetching URL: {e}")
        return {}

    # Calculate averages
    return {key: (sum(nums)/len(nums), pop) for key, (nums, pop) in data.items()}

def process_main(url): # i'm sorry you have to deal with this code
    try:
        # Fetch the page content
        response = requests.get(url)
        response.raise_for_status()  # Ensure the request was successful
    
        # Parse the HTML to get the raw wikitext from the textarea
        soup = BeautifulSoup(response.text, 'html.parser')
        textarea = soup.find('textarea', id='wpTextbox1')
        if not textarea:
            print("Could not find document content in the page")
            return {}

        raw_text = textarea.get_text()

        (before, after) = raw_text.split('{|', 1)
        (raw_text, after) = after.split('|}', 1)
        if after[0] == "\n":
            after = after[1:]

        # add "last updated by"
        index = after.find('\n')
        first_part = after[:index + 1]
        rest_part = after[index + 1:]

        if first_part.startswith("Last updated by"):
            new_first = "\nLast updated by ~~~~.\n"
            after = new_first + rest_part
        else:
            after = "\nLast updated by ~~~~.\n" + after

        # Preprocess the text: replace newline followed by | with ||, then split into entries
        processed_text = raw_text.replace('\n|', '||')
        entries = re.split(r'\|\|-\s*', processed_text)

        result = {}

        for entry in entries:
            entry = entry.strip()
            if not entry.startswith('|'):
                continue
    
            # Split the entry into parts, considering possible spaces around ||
            parts = re.split(r'\s*\|\|\s*', entry)

            if parts[-1] == "}":
                parts = parts[:-1] # sorry

            if parts[0] == "" and len(parts) == 7:
                parts = parts[1:]
            else:
                print("AN UNEXPECTED THING HAPPENED.")
                print("CHECK THAT THE TABLE LOOKS NORMAL.")
                print("IF THE TABLE LOOKS NORMAL, TELL HKM HIS CODE IS BROKEN.")
                print(parts)
            if len(parts) < 6:
                continue  # Not enough columns
    
            # Extract AUTHOR from the first part
            author = parts[0].lstrip('|').strip()
            if not author:
                continue
            
            # Split the second part into LINK and NAME
            print(parts)
            link = parts[1].strip()
            name = extract_display_text(link)
            print("LINK " + link + ". NAME: " + name) ##################
    
            tuning = parts[2].strip()
            notes = parts[3].strip()
            pop = parts[5].strip()
            
            # Add to the result dictionary
            if (author, name) in result:
                old_pop = result[(author, name)][2]
                
                try:
                    old_pop = int(old_pop)
                except:
                    result[(author, name)] = (tuning, notes, pop, link)
        
                try:
                    pop = int(pop)
                except: # idk what this error is
                    continue

                '''
                if old_pop < new_pop:
                    result[(author, name)] = (tuning, notes, pop, link)
                else:
                    continue
                '''
            else:
                result[(author, name)] = (tuning, notes, pop, link)
    except requests.RequestException as e:
        print(f"Error fetching URL: {e}")
        return {}

    return result, before, after
        



talk = "https://en.xen.wiki/index.php?title=User_talk:Hkm/Rankings&action=edit"
main = "https://en.xen.wiki/index.php?title=List_of_xenharmonic_music_by_community_ratings&action=edit"
averages = process_talk(talk)
data, before, after = process_main(main)

clip = before

clip += '''{| class="wikitable sortable" style="margin: auto; max-width: 800px; width: 100%;"
! Creator !! Work !! Tuning !! Notes !! R !! #

'''

# print from talk page
for (author, work), (rating, pop) in sorted(averages.items(), key=lambda a: -a[1][0]):
    tuning, notes, _, link = data.setdefault((author, work), ("", "", "", work))
    clip += (f"|-\n| {author} || {link} || {tuning} || {notes} || {rating:.2f} || {pop}\n")

# print main pages not in talk page
for i in data:
    if (i[0], i[1]) not in averages:
        (author, work) = i
        tuning, notes, _, link = data.setdefault((author, work), ("", "", "", ""))
        clip += (f"|-\n| {author} || {link} || {tuning} || {notes} || || \n")

        
clip += "|}"

clip += after
pyperclip.copy(clip)