User:Hkm/Rankings program: Difference between revisions

ArrowHead294 (talk | contribs)
mNo edit summary
Hkm (talk | contribs)
No edit summary
Line 1: Line 1:
<syntaxhighlight lang="python" line>
<syntaxhighlight lang="python" line="">
"""
Wiki Rating Processor
 
This script processes community ratings from a talk page and merges them with main list entries
from a xenharmonic wiki. Results are formatted into a sorted wikitable and copied to clipboard.
 
disclaimer: a lot of this is ai code, because I don't have this much time on my hands
"""


'''when adding a new work, the user should put it at the bottom
so that it does not replace the tuning and notes if that work was already there'''
import re
import re
import requests
import requests
Line 8: Line 14:
import pyperclip
import pyperclip


def extract_display_text(wiki_text):
# Regular expressions precompiled for better performance
     # Process internal links: [[Page|Display]] → Display (or Page if no Display)
WIKI_LINK_PATTERN = re.compile(r'\[\[(.*?)(?:\|(.*?))?\]\]')
     processed_text = re.sub(
EXTERNAL_LINK_PATTERN = re.compile(r'\[(https?://\S+)\s+(.*)\]')
        r'\[\[(.*?)(?:\|(.*?))?\]\]',
TALK_PAGE_PATTERN = re.compile(r'^([^:]+):\s+(.*)\.\s+([0-9.]+)$')
         lambda m: m.group(2) if m.group(2) else m.group(1),
 
         wiki_text
# Constants for input validation
    )
MAX_AUTHOR_LENGTH = 40
MAX_DESCRIPTION_LENGTH = 100
VALID_RATING_RANGE = (0, 5)
WIKITABLE_HEADER = '''{| class="wikitable sortable" style="margin: auto; max-width: 800px; width: 100%;"
|+Xenharmonic works sorted by community ranking. The "R" column is the rating of the work, \
and the "#" column is the number of ratings given to that work.
! Creator !! Work !! Tuning !! Notes !! R !! #
 
'''
 
DEFAULT_ENTRY = {
    'tuning': '',
    'notes': '',
    'popularity': '0',
    'link': ''
}
 
def format_link(text: str) -> str:
    """Apply special formatting to each link in the string individually."""
    text = remove_formatting(text)
     # Process internal links to underline display text
    def replace_internal(match):
        content = match.group(1)
        parts = content.split('|', 1)
        if len(parts) == 1:
            target = parts[0].strip()
            #return f'[[{target}|<u>{target}</u>]]'
            return f'<u>[[{target}]]</u>'
        else:
            target, display = parts
            target = target.strip()
            display = display.strip()
            return f'<u>[[{target}|{display}]]</u>'
   
     processed_text = re.sub(r'\[\[(.*?)\]\]', replace_internal, text, flags=re.DOTALL)
   
    # Process external links according to rules
    def replace_external(match):
         content = match.group(1)
        parts = content.split(' ', 1)
        url_part = parts[0].strip().lower()
        reconstructed = parts[0].strip() + (f' {parts[1].strip()}' if len(parts) > 1 else '')
          
        if 'bandcamp' in url_part:
            return f'[{content}]'
        elif 'youtube' in url_part:
            return f"''[{reconstructed}]''"
        else:
            return f'[{url_part} <nowiki>[{parts[1].strip()}]</nowiki>]'
      
      
    # Process external links: [URL Display] → Display (remove if no Display)
     processed_text = re.sub(r'(?<!\[)\[(?!\[)(.*?)\]', replace_external, processed_text, flags=re.DOTALL)
     processed_text = re.sub(
        r'\[(https?://\S+)(?:\s+([^\]]+))?\]',
        lambda m: m.group(2) if m.group(2) else '',
        processed_text
    )
      
      
     return processed_text
     return processed_text


def process_talk(url):
def process_wiki_links(text: str) -> str:
     #pattern= r'^([^:]+):\s+([^.]*)\.\s+([0-9.]+)$'
    """Process wiki links and external links to extract display text."""
     pattern = r'^([^:]+):\s+(.*)\.\s+([0-9.]+)$'
    # Process internal wiki links
     data = {}
    text = WIKI_LINK_PATTERN.sub(lambda m: m.group(2) or m.group(1), text)
     # Process external links to extract display text
    text = EXTERNAL_LINK_PATTERN.sub(
        lambda m: m.group(2).strip() if m.group(2) else '',
        text
    )
    return text
 
def remove_formatting(text: str) -> str:
    """Remove wiki formatting, HTML tags, and paired <nowiki> tags."""
    # Corrected regex to match <nowiki>[content]</nowiki>
    text = re.sub(r'<nowiki>\[(.*?)\]</nowiki>', r'\1', text, flags=re.DOTALL)
    # Remove any remaining HTML tags
    text = re.sub(r'<\/?[a-z]+>', '', text)
     # Remove bold, italic, and underline
    text = re.sub(r"'''(.*?)'''", r'\1', text)
    text = re.sub(r"''(.*?)''", r'\1', text)
    text = re.sub(r"__(.*?)__", r'\1', text)
     return text.strip()


"""TALK"""
def process_talk_page(url: str) -> dict:
    """Process talk page entries to calculate community ratings.
    Returns a dict from (author, work) to its list of ratings."""
    ratings = {}
     try:
     try:
         response = requests.get(url)
         response = requests.get(url)
         response.raise_for_status() # Raise exception for HTTP errors
         response.raise_for_status()
       
        # Parse the edit page content to find the wikitext
         soup = BeautifulSoup(response.text, 'html.parser')
         soup = BeautifulSoup(response.text, 'html.parser')
         textarea = soup.find('textarea', {'id': 'wpTextbox1'})
         if not (textarea := soup.find('textarea', {'id': 'wpTextbox1'})):
       
             print("Error: Could not find talk page content")
        if not textarea:
             print("Could not find document content in the page")
             return {}
             return {}
           
        lines = textarea.text.split('\n')


         for line in lines:
         for line in textarea.text.split('\n'):
            line = line.strip()
             if match := TALK_PAGE_PATTERN.match(line.strip()):
             match = re.match(pattern, line)
                author, description, rating_str = match.groups()
            if not match:
                if len(author) <= MAX_AUTHOR_LENGTH and \
                continue
                  len(description) <= MAX_DESCRIPTION_LENGTH:
           
                    process_rating(author, description, rating_str, ratings)
            thing_a = match.group(1)
            thing_b = match.group(2)
            number_str = match.group(3)
           
            if len(thing_a) == 0 or len(thing_b) == 0:
                continue
               
            if len(thing_a) > 40 or len(thing_b) > 100:
                continue


            try:
                number = float(number_str)
                if not (0 <= number <= 5):
                    continue
            except ValueError:
                continue
            key = (thing_a, thing_b)
            value = data.setdefault(key, ([0, 5], 0))
            data[key] = (value[0]+[number], value[1]+1)
           
   
     except requests.RequestException as e:
     except requests.RequestException as e:
         print(f"Error fetching URL: {e}")
         print(f"Network error: {e}")
        return {}
    return ratings


     # Calculate averages
def process_rating(author: str, desc: str, rating_str: str, ratings: dict):
     return {key: (sum(nums)/len(nums), pop) for key, (nums, pop) in data.items()}
     """Process and validate a single rating entry."""
     try:
        rating = float(rating_str)
        if VALID_RATING_RANGE[0] <= rating <= VALID_RATING_RANGE[1]:
            key = (author, desc)
            ratings.setdefault(key, []).append(rating)
    except ValueError:
        pass


def process_main(url): # i'm sorry you have to deal with this code
def process_main_page(url: str) -> tuple:
    """Process main list page entries."""
     try:
     try:
        # Fetch the page content
         response = requests.get(url)
         response = requests.get(url)
         response.raise_for_status() # Ensure the request was successful
         response.raise_for_status()
   
        # Parse the HTML to get the raw wikitext from the textarea
         soup = BeautifulSoup(response.text, 'html.parser')
         soup = BeautifulSoup(response.text, 'html.parser')
         textarea = soup.find('textarea', id='wpTextbox1')
         if not (textarea := soup.find('textarea', id='wpTextbox1')):
        if not textarea:
             print("Error: Could not find main page content")
             print("Could not find document content in the page")
             return {}, '', ''
             return {}


         raw_text = textarea.get_text()
         raw_text = textarea.get_text()
        before, table_section, after = parse_table_sections(raw_text)
        return process_table_entries(table_section), before, update_last_modified(after)


        (before, after) = raw_text.split('{|', 1)
    except (requests.RequestException, ValueError) as e:
         (raw_text, after) = after.split('|}', 1)
        print(f"Error: {e}")
        if after[0] == "\n":
         return {}, '', ''
            after = after[1:]


         # add "last updated by"
def parse_table_sections(text: str) -> tuple:
         index = after.find('\n')
    """Split text into sections around the wikitable."""
         first_part = after[:index + 1]
    try:
         rest_part = after[index + 1:]
         # Split into parts before and after the table
         before_table, table_plus_after = text.split('{|', 1)
         table_content, after_table = table_plus_after.split('|}', 1)
        return before_table, table_content.strip(), after_table
    except ValueError as e:
         print(f"Table parsing error: {e}")
        return "", "", text


        if first_part.startswith("Last updated by"):
def update_last_modified(content: str) -> str:
            new_first = "\nLast updated by ~~~~.\n"
    print(content)
            after = new_first + rest_part
    """Update last modified timestamp."""
         else:
    return (
            after = "\nLast updated by ~~~~.\n" + after
        "\nLast updated by ~~~~.\n" + content[content.find("Last updated by"):].split('\n', 1)[1]
        if content.lstrip().startswith("Last updated by")
         else "\nLast updated by ~~~~.\n" + content
    )


        # Preprocess the text: replace newline followed by | with ||, then split into entries
def process_table_entries(table_section: str) -> dict:
        processed_text = raw_text.replace('\n|', '||')
    """Process table entries into structured data."""
         entries = re.split(r'\|\|-\s*', processed_text)
    entries = {}
    for entry in re.split(r'\|\|-\s*', table_section.replace('\n|', '||')):
         if entry.strip().startswith('|'):
            process_single_entry(entry, entries)
    return entries


         result = {}
def process_single_entry(entry: str, entries: dict):
    """Process individual table entry."""
    parts = [p.strip() for p in re.split(r'\s*\|\|\s*', entry.lstrip('|'))]
    if len(parts) >= 6 and parts[-1] != "}":
         author = parts[0]
        work_link = parts[1]
        entry_data = {
            'tuning': parts[2],
            'notes': parts[3],
            'popularity': parts[5],
            'link': work_link
        }
        # update_entries(author, extract_display_text(work_link), entry_data, entries)
        update_entries(author, remove_formatting(process_wiki_links(work_link)),
                      entry_data, entries)


        for entry in entries:
def update_entries(author: str, work: str, new_entry: dict, entries: dict):
            entry = entry.strip()
    """Update entries with conflict resolution."""
            if not entry.startswith('|'):
    norm_key = (author.lower(), work.lower())
                continue
    existing = next((k for k in entries if (k[0].lower(), k[1].lower()) == norm_key), None)
      
      
             # Split the entry into parts, considering possible spaces around ||
    if existing:
             parts = re.split(r'\s*\|\|\s*', entry)
        if should_replace(existing, new_entry, entries):
             del entries[existing]
             entries[(author, work)] = new_entry
    else:
        entries[(author, work)] = new_entry


            if parts[-1] == "}":
def should_replace(existing_key: tuple, new_entry: dict, entries: dict) -> bool:
                parts = parts[:-1] # sorry
    """Determine if new entry should replace existing one."""
    try:
        current_pop = int(entries[existing_key]['popularity'])
        new_pop = int(new_entry['popularity'])
        return new_pop > current_pop
    except (KeyError, ValueError):
        return True


            if parts[0] == "" and len(parts) == 7:
def build_output(before: str, after: str, averages: dict, main_data: dict) -> str:
                parts = parts[1:]
    """Build final output using rating lists directly."""
            else:
    output = [before, WIKITABLE_HEADER]
                print("AN UNEXPECTED THING HAPPENED.")
 
                print("CHECK THAT THE TABLE LOOKS NORMAL.")
    sorted_items = sorted(
                print("IF THE TABLE LOOKS NORMAL, TELL HKM HIS CODE IS BROKEN.")
        averages.items(), # a tuple pair ((author, work), [list_of_ratings])
                print(parts)
        key=lambda x: (-(sum(x[1])+5) / (len(x[1])+2) ), # Descending by average
            if len(parts) < 6:
    )
                continue # Not enough columns
      
      
            # Extract AUTHOR from the first part
    for (author, work), ratings in sorted_items:
            author = parts[0].lstrip('|').strip()
        avg = sum(ratings) / len(ratings)
            if not author:
        count = len(ratings)
                continue
        entry = main_data.get((author, work), {**DEFAULT_ENTRY, 'link': work})
           
        output.append(format_row(author, entry, avg, count))
            # Split the second part into LINK and NAME
            print(parts)
            link = parts[1].strip()
            name = extract_display_text(link)
            print("LINK " + link + ". NAME: " + name) ##################
      
      
            tuning = parts[2].strip()
    # Add unrated entries
            notes = parts[3].strip()
    for (author, work) in set(main_data) - set(averages):
            pop = parts[5].strip()
        entry = main_data[(author, work)]
           
        output.append(format_row(author, entry))
            # Add to the result dictionary
   
            if (author, name) in result:
    return ''.join(output + ["|}\n", after])
                old_pop = result[(author, name)][2]
               
                try:
                    old_pop = int(old_pop)
                except:
                    result[(author, name)] = (tuning, notes, pop, link)
       
                try:
                    pop = int(pop)
                except: # idk what this error is
                    continue


                '''
def format_row(author: str, entry: dict, rating: float = None, count: int = None) -> str:
                if old_pop < new_pop:
    """Format row using calculated values when available."""
                    result[(author, name)] = (tuning, notes, pop, link)
    rating_str = f"{rating:.2f}" if rating is not None else ""
                else:
    count_str = str(count) if count is not None else ""
                    continue
     return (f"|-\n| {author} || {format_link(entry['link'])} || {entry['tuning']} || "
                '''
            f"{entry['notes']} || {rating_str} || {count_str}\n")
            else:
                result[(author, name)] = (tuning, notes, pop, link)
     except requests.RequestException as e:
        print(f"Error fetching URL: {e}")
        return {}
 
    return result, before, after
       


def main():
    """Main processing workflow."""
    talk_url = "https://en.xen.wiki/index.php?title=User_talk:Hkm/Rankings&action=edit"
    main_url = "https://en.xen.wiki/index.php?title=List_of_xenharmonic_music_by_community_ratings&action=edit"


    averages = process_talk_page(talk_url)
    main_data, before, after = process_main_page(main_url)
   
    if main_data:
        pyperclip.copy(build_output(before, after, averages, main_data))
        print("Results copied to clipboard")


talk = "https://en.xen.wiki/index.php?title=User_talk:Hkm/Rankings&action=edit"
    n = "Namoic: [https://benyamind.bandcamp.com/track/chromacro-17-edo <nowiki>[Chromacro]</nowiki>]"
main = "https://en.xen.wiki/index.php?title=List_of_xenharmonic_music_by_community_ratings&action=edit"
    print(n + '\n' + format_link(n) + '\n')
averages = process_talk(talk)
data, before, after = process_main(main)


clip = before
    n = "[https://soundcloud.com/jollybard/pop-song <nowiki>[pop song]</nowiki>]"
 
     print(n + '\n' + format_link(n) + '\n')
clip += '''{| class="wikitable sortable" style="margin: auto; max-width: 800px; width: 100%;"
|-
! Creator !! Work !! Tuning !! Notes !! Rating !! {{nowrap|Number of ratings}}
 
'''
 
# print from talk page
for (author, work), (rating, pop) in sorted(averages.items(), key=lambda a: -a[1][0]):
    tuning, notes, _, link = data.setdefault((author, work), ("", "", "", work))
     clip += (f"|-\n| {author} || {link} || {tuning} || {notes} || {rating:.2f} || {pop}\n")
 
# print main pages not in talk page
for i in data:
    if (i[0], i[1]) not in averages:
        (author, work) = i
        tuning, notes, _, link = data.setdefault((author, work), ("", "", "", ""))
        clip += (f"|-\n| {author} || {link} || {tuning} || {notes} || || \n")
 
       
clip += "|}"


clip += after
    n = "Harmony Hacker: <u>[[Gleam]]</u>"
pyperclip.copy(clip)
    print(n + '\n' + format_link(n) + '\n')


</syntaxhighlight>
if __name__ == "__main__":
    main()
</syntaxhighlight>