User:Hkm/Rankings program: Difference between revisions

Line 1:

"""

Wiki Rating Processor

This script processes community ratings from a talk page and merges them with main list entries

from a xenharmonic wiki. Results are formatted into a sorted wikitable and copied to clipboard.

disclaimer: a lot of this is ai code, because I don't have this much time on my hands

"""

~~'''when adding a new work, the user should put it at the bottom~~

~~so that it does not replace the tuning and notes if that work was already there'''~~

import re

import requests

Line 8:

Line 14:

import pyperclip

def ~~extract_display_text~~(~~wiki_text~~):

# Regular expressions precompiled for better performance

# Process internal links: [[~~Page~~|~~Display~~]] ~~→ Display~~ (~~or Page if no Display~~)

WIKI_LINK_PATTERN = re.compile(r'\[\[(.*?)(?:\|(.*?))?\]\]')

processed_text = re.sub(

EXTERNAL_LINK_PATTERN = re.compile(r'\[(https?://\S+)\s+(.*)\]')

r'\[\[(.*?)~~(?:\|(.*?))?~~\]\]',

TALK_PAGE_PATTERN = re.compile(r'^([^:]+):\s+(.*)\.\s+([0-9.]+)$')

~~lambda m: m~~.group(2) ~~if m~~.~~group~~(2) ~~else m~~.~~group~~(1),

~~wiki_text~~

# Constants for input validation

)

MAX_AUTHOR_LENGTH = 40

MAX_DESCRIPTION_LENGTH = 100

VALID_RATING_RANGE = (0, 5)

WIKITABLE_HEADER = '''{| class="wikitable sortable" style="margin: auto; max-width: 800px; width: 100%;"

|+Xenharmonic works sorted by community ranking. The "R" column is the rating of the work, \

and the "#" column is the number of ratings given to that work.

! Creator !! Work !! Tuning !! Notes !! R !! #

'''

DEFAULT_ENTRY = {

'tuning': '',

'notes': '',

'popularity': '0',

'link': ''

}

def format_link(text: str) -> str:

"""Apply special formatting to each link in the string individually."""

text = remove_formatting(text)

# Process internal links to underline display text

def replace_internal(match):

content = match.group(1)

parts = content.split('|', 1)

if len(parts) == 1:

target = parts[0].strip()

#return f'[[{target}|{target}]]'

return f'[[{target}]]'

else:

target, display = parts

target = target.strip()

display = display.strip()

return f'[[{target}|{display}]]'

processed_text = re.sub(r'\[\[(.*?)\]\]', replace_internal, text, flags=re.DOTALL)

# Process external links according to rules

def replace_external(match):

content = match.group(1)

parts = content.split(' ', 1)

url_part = parts[0].strip().lower()

reconstructed = parts[0].strip() + (f' {parts[1].strip()}' if len(parts) > 1 else '')

if 'bandcamp' in url_part:

return f'[{content}]'

elif 'youtube' in url_part:

return f"''[{reconstructed}]''"

else:

return f'[{url_part} <nowiki>[{parts[1].strip()}]</nowiki>]'

~~# Process external links: [URL Display] → Display (remove if no Display)~~

processed_text = re.sub(r'(?<!\[)\[(?!\[)(.*?)\]', replace_external, processed_text, flags=re.DOTALL)

processed_text = re.sub(

r'\[(~~https~~?~~://~~\S+)(?:\~~s+(~~[~~^\]]+~~))?\]',

~~lambda m: m.group(2) if m.group(2) else ''~~,

processed_text

)

return processed_text

def ~~process_talk~~(~~url~~):

def process_wiki_links(text: str) -> str:

#~~pattern~~= r'^([^:]+):\s+([^.]*)\.\~~s+(~~[0-9.]+)$'

"""Process wiki links and external links to extract display text."""

~~pattern~~ = r'^(~~[^:]+~~):\s+(.*)\.~~\s+~~(~~[0-9~~.]+)$'

# Process internal wiki links

~~data = {}~~

text = WIKI_LINK_PATTERN.sub(lambda m: m.group(2) or m.group(1), text)

# Process external links to extract display text

text = EXTERNAL_LINK_PATTERN.sub(

lambda m: m.group(2).strip() if m.group(2) else '',

text

)

return text

def remove_formatting(text: str) -> str:

"""Remove wiki formatting, HTML tags, and paired <nowiki> tags."""

# Corrected regex to match <nowiki>[content]</nowiki>

text = re.sub(r'<nowiki>\[(.*?)\]</nowiki>', r'\1', text, flags=re.DOTALL)

# Remove any remaining HTML tags

text = re.sub(r'<\/?[a-z]+>', '', text)

# Remove bold, italic, and underline

text = re.sub(r"'''(.*?)'''", r'\1', text)

text = re.sub(r"''(.*?)''", r'\1', text)

text = re.sub(r"__(.*?)__", r'\1', text)

return text.strip()

"""TALK"""

def process_talk_page(url: str) -> dict:

"""Process talk page entries to calculate community ratings.

Returns a dict from (author, work) to its list of ratings."""

ratings = {}

try:

response = requests.get(url)

response.raise_for_status() ~~# Raise exception for HTTP errors~~

response.raise_for_status()

~~# Parse the edit page content to find the wikitext~~

soup = BeautifulSoup(response.text, 'html.parser')

textarea = soup.find('textarea', {'id': 'wpTextbox1'})

if not (textarea := soup.find('textarea', {'id': 'wpTextbox1'})):

print("Error: Could not find talk page content")

~~if not textarea~~:

print("Could not find ~~document~~ content ~~in the page~~")

return {}

~~lines = textarea.text.split('\n')~~

for line in ~~lines:~~

for line in textarea.text.split('\n'):

~~line = line~~.~~strip~~()

if match := TALK_PAGE_PATTERN.match(line.strip()):

match = re.match(~~pattern,~~ line)

author, description, rating_str = match.groups()

~~if not match:~~

if len(author) <= MAX_AUTHOR_LENGTH and \

~~continue~~

len(description) <= MAX_DESCRIPTION_LENGTH:

process_rating(author, description, rating_str, ratings)

~~thing_a = match~~.~~group~~(1)

~~thing_b = match.group(2~~)

~~number_str~~ = match.~~group~~(3)

if len(~~thing_a~~) =~~= 0 or~~ len(~~thing_b~~) =~~= 0~~:

~~continue~~

~~if len~~(~~thing_a) > 40 or len(thing_b~~) ~~> 100:~~

~~continue~~

~~try:~~

~~number = float(number_str)~~

~~if not (0 <= number <= 5):~~

~~continue~~

~~except ValueError:~~

~~continue~~

~~key = (thing_a, thing_b)~~

~~value = data.setdefault(key, ([0, 5], 0))~~

~~data[key] = (value[0]+[number], value[1]+1)~~

except requests.RequestException as e:

print(f"~~Error fetching URL~~: {e}")

print(f"Network error: {e}")

return {}

return ratings

~~# Calculate averages~~

def process_rating(author: str, desc: str, rating_str: str, ratings: dict):

~~return {key~~: (~~sum(nums~~)~~/len~~(~~nums)~~, ~~pop~~) ~~for~~ key, ~~(nums, pop~~) ~~in data~~.~~items~~()}

"""Process and validate a single rating entry."""

try:

rating = float(rating_str)

if VALID_RATING_RANGE[0] <= rating <= VALID_RATING_RANGE[1]:

key = (author, desc)

ratings.setdefault(key, []).append(rating)

except ValueError:

pass

def ~~process_main~~(url): ~~# i'm sorry you have to deal with this code~~

def process_main_page(url: str) -> tuple:

"""Process main list page entries."""

try:

~~# Fetch the page content~~

response = requests.get(url)

response.raise_for_status() ~~# Ensure the request was successful~~

response.raise_for_status()

~~# Parse the HTML to get the raw wikitext from the textarea~~

soup = BeautifulSoup(response.text, 'html.parser')

textarea = soup.find('textarea', id='wpTextbox1')

if not (textarea := soup.find('textarea', id='wpTextbox1')):

~~if not textarea~~:

print("Error: Could not find main page content")

print("Could not find ~~document~~ content ~~in the page~~")

return {}, '', ''

return {}

raw_text = textarea.get_text()

before, table_section, after = parse_table_sections(raw_text)

return process_table_entries(table_section), before, update_last_modified(after)

(~~before~~, ~~after~~) ~~= raw_text.split~~('{~~|', 1~~)

except (requests.RequestException, ValueError) as e:

~~(raw_text~~, ~~after) = after.split(~~'|}', 1)

print(f"Error: {e}")

~~if after[0] == "\n":~~

return {}, '', ''

~~after = after[1:]~~

# ~~add "last updated by"~~

def parse_table_sections(text: str) -> tuple:

~~index~~ = ~~after~~.~~find~~('\n')

"""Split text into sections around the wikitable."""

~~first_part~~ = ~~after[~~:~~index + 1]~~

try:

~~rest_part = after[index + 1~~:]

# Split into parts before and after the table

before_table, table_plus_after = text.split('{|', 1)

table_content, after_table = table_plus_after.split('|}', 1)

return before_table, table_content.strip(), after_table

except ValueError as e:

print(f"Table parsing error: {e}")

return "", "", text

~~if first_part~~.~~startswith(~~"~~Last updated by~~"):

def update_last_modified(content: str) -> str:

~~new_first =~~ "\nLast updated by ~~~~.\n"

print(content)

~~after = new_first + rest_part~~

"""Update last modified timestamp."""

else:

return (

~~after =~~ "\nLast updated by ~~~~.\n" + ~~after~~

"\nLast updated by ~~~~.\n" + content[content.find("Last updated by"):].split('\n', 1)[1]

if content.lstrip().startswith("Last updated by")

else "\nLast updated by ~~~~.\n" + content

)

~~# Preprocess the text~~: ~~replace newline followed by | with~~ ||, ~~then split into entries~~

def process_table_entries(table_section: str) -> dict:

~~processed_text = raw_text~~.replace('\n|', '||')

"""Process table entries into structured data."""

~~entries = re~~.~~split~~(r'~~\|\~~|-\s*', ~~processed_text~~)

entries = {}

for entry in re.split(r'\|\|-\s*', table_section.replace('\n|', '||')):

if entry.strip().startswith('|'):

process_single_entry(entry, entries)

return entries

~~result~~ = {}

def process_single_entry(entry: str, entries: dict):

"""Process individual table entry."""

parts = [p.strip() for p in re.split(r'\s*\|\|\s*', entry.lstrip('|'))]

if len(parts) >= 6 and parts[-1] != "}":

author = parts[0]

work_link = parts[1]

entry_data = {

'tuning': parts[2],

'notes': parts[3],

'popularity': parts[5],

'link': work_link

}

# update_entries(author, extract_display_text(work_link), entry_data, entries)

update_entries(author, remove_formatting(process_wiki_links(work_link)),

entry_data, entries)

~~for entry in~~ entries:

def update_entries(author: str, work: str, new_entry: dict, entries: dict):

~~entry~~ = ~~entry~~.~~strip~~()

"""Update entries with conflict resolution."""

if ~~not entry~~.~~startswith~~(~~'|'~~):

norm_key = (author.lower(), work.lower())

~~continue~~

existing = next((k for k in entries if (k[0].lower(), k[1].lower()) == norm_key), None)

~~# Split the entry into parts, considering possible spaces around ||~~

if existing:

~~parts~~ = ~~re.split~~(~~r'\s*\|\|\s*'~~, ~~entry~~)

if should_replace(existing, new_entry, entries):

del entries[existing]

entries[(author, work)] = new_entry

else:

entries[(author, work)] = new_entry

if ~~parts[-1] ==~~ "}":

def should_replace(existing_key: tuple, new_entry: dict, entries: dict) -> bool:

~~parts~~ = ~~parts~~[:~~-1] # sorry~~

"""Determine if new entry should replace existing one."""

try:

current_pop = int(entries[existing_key]['popularity'])

new_pop = int(new_entry['popularity'])

return new_pop > current_pop

except (KeyError, ValueError):

return True

~~if parts[0] == "" and len~~(~~parts~~) ~~== 7~~:

def build_output(before: str, after: str, averages: dict, main_data: dict) -> str:

~~parts~~ = ~~parts~~[1:]

"""Build final output using rating lists directly."""

~~else:~~

output = [before, WIKITABLE_HEADER]

~~print~~(~~"AN UNEXPECTED THING HAPPENED~~.")

~~print~~(~~"CHECK THAT THE TABLE LOOKS NORMAL."~~)

sorted_items = sorted(

~~print("IF THE TABLE LOOKS NORMAL~~, ~~TELL HKM HIS CODE IS BROKEN."~~)

averages.items(), # a tuple pair ((author, work), [list_of_ratings])

~~print~~(~~parts~~)

key=lambda x: (-(sum(x[1])+5) / (len(x[1])+2) ), # Descending by average

if len(~~parts~~) ~~< 6:~~

)

~~continue~~ # ~~Not enough columns~~

~~# Extract AUTHOR from the first part~~

for (author, work), ratings in sorted_items:

~~author~~ = ~~parts[0].lstrip~~(~~'|'~~)~~.strip~~()

avg = sum(ratings) / len(ratings)

~~if not author:~~

count = len(ratings)

~~continue~~

entry = main_data.get((author, work), {**DEFAULT_ENTRY, 'link': work})

output.append(format_row(author, entry, avg, count))

~~# Split the second part into LINK and NAME~~

~~print~~(~~parts~~)

~~link~~ = ~~parts[1]~~.~~strip~~()

~~name = extract_display_text(~~link)

~~print~~(~~"LINK " + link + ". NAME: " + name~~) ~~##################~~

~~tuning = parts[2].strip()~~

# Add unrated entries

~~notes = parts[3].strip()~~

for (author, work) in set(main_data) - set(averages):

~~pop = parts[5].strip()~~

entry = main_data[(author, work)]

output.append(format_row(author, entry))

# Add ~~to the result dictionary~~

if (author, ~~name~~) in ~~result~~:

return ''.join(output + ["|}\n", after])

~~old_pop~~ = ~~result~~[(author, ~~name~~)~~][2~~]

~~try:~~

~~old_pop = int~~(~~old_pop)~~

~~except:~~

~~result[~~(author, ~~name~~)~~] = (tuning, notes, pop, link~~)

~~try:~~

~~pop = int~~(~~pop~~)

~~except: # idk what this error is~~

~~continue~~

~~'''~~

def format_row(author: str, entry: dict, rating: float = None, count: int = None) -> str:

~~if old_pop < new_pop:~~

"""Format row using calculated values when available."""

~~result[~~(author, ~~name)]~~ = ~~(tuning~~, ~~notes, pop, link~~)

rating_str = f"{rating:.2f}" if rating is not None else ""

~~else~~:

count_str = str(count) if count is not None else ""

~~continue~~

return (f"|-\n| {author} || {format_link(entry['link'])} || {entry['tuning']} || "

~~'''~~

f"{entry['notes']} || {rating_str} || {count_str}\n")

else:

~~result[(author, name)]~~ = (~~tuning, notes, pop, link~~)

~~except requests.RequestException as e:~~

~~print~~(f"~~Error fetching URL:~~ {e}")

~~return~~ {}

~~return result, before, after~~

def main():

"""Main processing workflow."""

talk_url = "https://en.xen.wiki/index.php?title=User_talk:Hkm/Rankings&action=edit"

main_url = "https://en.xen.wiki/index.php?title=List_of_xenharmonic_music_by_community_ratings&action=edit"

averages = process_talk_page(talk_url)

main_data, before, after = process_main_page(main_url)

if main_data:

pyperclip.copy(build_output(before, after, averages, main_data))

print("Results copied to clipboard")

~~talk~~ = "https://en.~~xen.wiki/index~~.~~php?title=User_talk:Hkm~~/~~Rankings&action=edit"~~

n = "Namoic: [https://benyamind.bandcamp.com/track/chromacro-17-edo <nowiki>[Chromacro]</nowiki>]"

~~main = "https:~~//~~en.xen.wiki/index.php?title=List_of_xenharmonic_music_by_community_ratings&action=edit~~"

print(n + '\n' + format_link(n) + '\n')

~~averages = process_talk~~(~~talk~~)

~~data, before, after = process_main(main~~)

~~clip = before~~

n = "[https://soundcloud.com/jollybard/pop-song <nowiki>[pop song]</nowiki>]"

print(n + '\n' + format_link(n) + '\n')

~~clip += '''{| class~~="~~wikitable sortable" style="margin~~: ~~auto; max-width: 800px; width: 100%;"~~

|-

~~! Creator !! Work !! Tuning !! Notes !! Rating !! {{nowrap|Number of ratings}}~~

~~'''~~

~~# print from talk page~~

~~for (author, work), (rating,~~ pop~~) in sorted(averages.items(), key=lambda a:~~ -a[1][0]):

~~tuning, notes, _, link = data.setdefault((author, work), ("",~~ "~~", "", work))~~

~~clip +=~~ (~~f"|-\~~n~~| {author} || {link} || {tuning} || {notes} || {rating:.2f} || {pop}~~\n")

~~# print main pages not in talk page~~

~~for i in data:~~

if (~~i[0], i[1]) not in averages:~~

~~(author, work~~) ~~= i~~

~~tuning, notes, _, link = data.setdefault((author, work), ("", "", "", ""))~~

~~clip~~ +~~= (f"|-~~\n~~| {author} || {link} || {tuning} || {notes} || || \n"~~)

~~clip += "|}"~~

~~clip +~~= ~~after~~

n = "Harmony Hacker: [[Gleam]]"

~~pyperclip.copy~~(~~clip~~)

print(n + '\n' + format_link(n) + '\n')

‎</syntaxhighlight>

if __name__ == "__main__":

main()

</syntaxhighlight>

@@ Line 1: / Line 1: @@
-<syntaxhighlight lang="python" line>
+<syntaxhighlight lang="python" line="">
+"""
+Wiki Rating Processor
+This script processes community ratings from a talk page and merges them with main list entries
+from a xenharmonic wiki. Results are formatted into a sorted wikitable and copied to clipboard.
+disclaimer: a lot of this is ai code, because I don't have this much time on my hands
+"""
-'''when adding a new work, the user should put it at the bottom
-so that it does not replace the tuning and notes if that work was already there'''
 import re
 import requests
@@ Line 8: / Line 14: @@
 import pyperclip
-def extract_display_text(wiki_text):
+# Regular expressions precompiled for better performance
-     # Process internal links: [[Page|Display]] → Display (or Page if no Display)
+WIKI_LINK_PATTERN = re.compile(r'\[\[(.*?)(?:\|(.*?))?\]\]')
-     processed_text = re.sub(
+EXTERNAL_LINK_PATTERN = re.compile(r'\[(https?://\S+)\s+(.*)\]')
-        r'\[\[(.*?)(?:\|(.*?))?\]\]',
+TALK_PAGE_PATTERN = re.compile(r'^([^:]+):\s+(.*)\.\s+([0-9.]+)$')
-         lambda m: m.group(2) if m.group(2) else m.group(1),
-         wiki_text
+# Constants for input validation
-    )
+MAX_AUTHOR_LENGTH = 40
+MAX_DESCRIPTION_LENGTH = 100
+VALID_RATING_RANGE = (0, 5)
+WIKITABLE_HEADER = '''{| class="wikitable sortable" style="margin: auto; max-width: 800px; width: 100%;"
+|+Xenharmonic works sorted by community ranking. The "R" column is the rating of the work, \
+and the "#" column is the number of ratings given to that work.
+! Creator !! Work !! Tuning !! Notes !! R !! #
+'''
+DEFAULT_ENTRY = {
+    'tuning': '',
+    'notes': '',
+    'popularity': '0',
+    'link': ''
+}
+def format_link(text: str) -> str:
+    """Apply special formatting to each link in the string individually."""
+    text = remove_formatting(text)
+     # Process internal links to underline display text
+    def replace_internal(match):
+        content = match.group(1)
+        parts = content.split('|', 1)
+        if len(parts) == 1:
+            target = parts[0].strip()
+            #return f'[[{target}|<u>{target}</u>]]'
+            return f'<u>[[{target}]]</u>'
+        else:
+            target, display = parts
+            target = target.strip()
+            display = display.strip()
+            return f'<u>[[{target}|{display}]]</u>'
+     processed_text = re.sub(r'\[\[(.*?)\]\]', replace_internal, text, flags=re.DOTALL)
+    # Process external links according to rules
+    def replace_external(match):
+         content = match.group(1)
+        parts = content.split(' ', 1)
+        url_part = parts[0].strip().lower()
+        reconstructed = parts[0].strip() + (f' {parts[1].strip()}' if len(parts) > 1 else '')
+        if 'bandcamp' in url_part:
+            return f'[{content}]'
+        elif 'youtube' in url_part:
+            return f"''[{reconstructed}]''"
+        else:
+            return f'[{url_part} <nowiki>[{parts[1].strip()}]</nowiki>]'
-    # Process external links: [URL Display] → Display (remove if no Display)
+     processed_text = re.sub(r'(?<!\[)\[(?!\[)(.*?)\]', replace_external, processed_text, flags=re.DOTALL)
-     processed_text = re.sub(
-        r'\[(https?://\S+)(?:\s+([^\]]+))?\]',
-        lambda m: m.group(2) if m.group(2) else '',
-        processed_text
-    )
      return processed_text
-def process_talk(url):
+def process_wiki_links(text: str) -> str:
-     #pattern= r'^([^:]+):\s+([^.]*)\.\s+([0-9.]+)$'
+    """Process wiki links and external links to extract display text."""
-     pattern = r'^([^:]+):\s+(.*)\.\s+([0-9.]+)$'
+    # Process internal wiki links
-     data = {}
+    text = WIKI_LINK_PATTERN.sub(lambda m: m.group(2) or m.group(1), text)
+     # Process external links to extract display text
+    text = EXTERNAL_LINK_PATTERN.sub(
+        lambda m: m.group(2).strip() if m.group(2) else '',
+        text
+    )
+    return text
+def remove_formatting(text: str) -> str:
+    """Remove wiki formatting, HTML tags, and paired <nowiki> tags."""
+    # Corrected regex to match <nowiki>[content]</nowiki>
+    text = re.sub(r'<nowiki>\[(.*?)\]</nowiki>', r'\1', text, flags=re.DOTALL)
+    # Remove any remaining HTML tags
+    text = re.sub(r'<\/?[a-z]+>', '', text)
+     # Remove bold, italic, and underline
+    text = re.sub(r"'''(.*?)'''", r'\1', text)
+    text = re.sub(r"''(.*?)''", r'\1', text)
+    text = re.sub(r"__(.*?)__", r'\1', text)
+     return text.strip()
+"""TALK"""
+def process_talk_page(url: str) -> dict:
+    """Process talk page entries to calculate community ratings.
+    Returns a dict from (author, work) to its list of ratings."""
+    ratings = {}
      try:
          response = requests.get(url)
-         response.raise_for_status()  # Raise exception for HTTP errors
+         response.raise_for_status()
-        # Parse the edit page content to find the wikitext
          soup = BeautifulSoup(response.text, 'html.parser')
-         textarea = soup.find('textarea', {'id': 'wpTextbox1'})
+         if not (textarea := soup.find('textarea', {'id': 'wpTextbox1'})):
+             print("Error: Could not find talk page content")
-        if not textarea:
-             print("Could not find document content in the page")
              return {}
-        lines = textarea.text.split('\n')
-         for line in lines:
+         for line in textarea.text.split('\n'):
-            line = line.strip()
+             if match := TALK_PAGE_PATTERN.match(line.strip()):
-             match = re.match(pattern, line)
+                author, description, rating_str = match.groups()
-            if not match:
+                if len(author) <= MAX_AUTHOR_LENGTH and \
-                continue
+                   len(description) <= MAX_DESCRIPTION_LENGTH:
+                    process_rating(author, description, rating_str, ratings)
-            thing_a = match.group(1)
-            thing_b = match.group(2)
-            number_str = match.group(3)
-            if len(thing_a) == 0 or len(thing_b) == 0:
-                continue
-            if len(thing_a) > 40 or len(thing_b) > 100:
-                continue
-            try:
-                number = float(number_str)
-                if not (0 <= number <= 5):
-                    continue
-            except ValueError:
-                continue
-            key = (thing_a, thing_b)
-            value = data.setdefault(key, ([0, 5], 0))
-            data[key] = (value[0]+[number], value[1]+1)
      except requests.RequestException as e:
-         print(f"Error fetching URL: {e}")
+         print(f"Network error: {e}")
-        return {}
+    return ratings
-     # Calculate averages
+def process_rating(author: str, desc: str, rating_str: str, ratings: dict):
-     return {key: (sum(nums)/len(nums), pop) for key, (nums, pop) in data.items()}
+     """Process and validate a single rating entry."""
+     try:
+        rating = float(rating_str)
+        if VALID_RATING_RANGE[0] <= rating <= VALID_RATING_RANGE[1]:
+            key = (author, desc)
+            ratings.setdefault(key, []).append(rating)
+    except ValueError:
+        pass
-def process_main(url): # i'm sorry you have to deal with this code
+def process_main_page(url: str) -> tuple:
+    """Process main list page entries."""
      try:
-        # Fetch the page content
          response = requests.get(url)
-         response.raise_for_status()  # Ensure the request was successful
+         response.raise_for_status()
-        # Parse the HTML to get the raw wikitext from the textarea
          soup = BeautifulSoup(response.text, 'html.parser')
-         textarea = soup.find('textarea', id='wpTextbox1')
+         if not (textarea := soup.find('textarea', id='wpTextbox1')):
-        if not textarea:
+             print("Error: Could not find main page content")
-             print("Could not find document content in the page")
+             return {}, '', ''
-             return {}
          raw_text = textarea.get_text()
+        before, table_section, after = parse_table_sections(raw_text)
+        return process_table_entries(table_section), before, update_last_modified(after)
-        (before, after) = raw_text.split('{|', 1)
+    except (requests.RequestException, ValueError) as e:
-         (raw_text, after) = after.split('|}', 1)
+        print(f"Error: {e}")
-        if after[0] == "\n":
+         return {}, '', ''
-            after = after[1:]
-         # add "last updated by"
+def parse_table_sections(text: str) -> tuple:
-         index = after.find('\n')
+    """Split text into sections around the wikitable."""
-         first_part = after[:index + 1]
+    try:
-         rest_part = after[index + 1:]
+         # Split into parts before and after the table
+         before_table, table_plus_after = text.split('{|', 1)
+         table_content, after_table = table_plus_after.split('|}', 1)
+        return before_table, table_content.strip(), after_table
+    except ValueError as e:
+         print(f"Table parsing error: {e}")
+        return "", "", text
-        if first_part.startswith("Last updated by"):
+def update_last_modified(content: str) -> str:
-            new_first = "\nLast updated by ~~~~.\n"
+    print(content)
-            after = new_first + rest_part
+    """Update last modified timestamp."""
-         else:
+    return (
-            after = "\nLast updated by ~~~~.\n" + after
+        "\nLast updated by ~~~~.\n" + content[content.find("Last updated by"):].split('\n', 1)[1]
+        if content.lstrip().startswith("Last updated by")
+         else "\nLast updated by ~~~~.\n" + content
+    )
-        # Preprocess the text: replace newline followed by | with ||, then split into entries
+def process_table_entries(table_section: str) -> dict:
-        processed_text = raw_text.replace('\n|', '||')
+    """Process table entries into structured data."""
-         entries = re.split(r'\|\|-\s*', processed_text)
+    entries = {}
+    for entry in re.split(r'\|\|-\s*', table_section.replace('\n|', '||')):
+         if entry.strip().startswith('|'):
+            process_single_entry(entry, entries)
+    return entries
-         result = {}
+def process_single_entry(entry: str, entries: dict):
+    """Process individual table entry."""
+    parts = [p.strip() for p in re.split(r'\s*\|\|\s*', entry.lstrip('|'))]
+    if len(parts) >= 6 and parts[-1] != "}":
+         author = parts[0]
+        work_link = parts[1]
+        entry_data = {
+            'tuning': parts[2],
+            'notes': parts[3],
+            'popularity': parts[5],
+            'link': work_link
+        }
+        # update_entries(author, extract_display_text(work_link), entry_data, entries)
+        update_entries(author, remove_formatting(process_wiki_links(work_link)),
+                       entry_data, entries)
-        for entry in entries:
+def update_entries(author: str, work: str, new_entry: dict, entries: dict):
-            entry = entry.strip()
+    """Update entries with conflict resolution."""
-            if not entry.startswith('|'):
+    norm_key = (author.lower(), work.lower())
-                continue
+    existing = next((k for k in entries if (k[0].lower(), k[1].lower()) == norm_key), None)
-             # Split the entry into parts, considering possible spaces around ||
+    if existing:
-             parts = re.split(r'\s*\|\|\s*', entry)
+        if should_replace(existing, new_entry, entries):
+             del entries[existing]
+             entries[(author, work)] = new_entry
+    else:
+        entries[(author, work)] = new_entry
-            if parts[-1] == "}":
+def should_replace(existing_key: tuple, new_entry: dict, entries: dict) -> bool:
-                parts = parts[:-1] # sorry
+    """Determine if new entry should replace existing one."""
+    try:
+        current_pop = int(entries[existing_key]['popularity'])
+        new_pop = int(new_entry['popularity'])
+        return new_pop > current_pop
+    except (KeyError, ValueError):
+        return True
-            if parts[0] == "" and len(parts) == 7:
+def build_output(before: str, after: str, averages: dict, main_data: dict) -> str:
-                parts = parts[1:]
+    """Build final output using rating lists directly."""
-            else:
+    output = [before, WIKITABLE_HEADER]
-                print("AN UNEXPECTED THING HAPPENED.")
-                print("CHECK THAT THE TABLE LOOKS NORMAL.")
+    sorted_items = sorted(
-                print("IF THE TABLE LOOKS NORMAL, TELL HKM HIS CODE IS BROKEN.")
+        averages.items(), # a tuple pair ((author, work), [list_of_ratings])
-                print(parts)
+        key=lambda x: (-(sum(x[1])+5) / (len(x[1])+2) ),  # Descending by average
-            if len(parts) < 6:
+    )
-                continue  # Not enough columns
-            # Extract AUTHOR from the first part
+    for (author, work), ratings in sorted_items:
-            author = parts[0].lstrip('|').strip()
+        avg = sum(ratings) / len(ratings)
-            if not author:
+        count = len(ratings)
-                continue
+        entry = main_data.get((author, work), {**DEFAULT_ENTRY, 'link': work})
+        output.append(format_row(author, entry, avg, count))
-            # Split the second part into LINK and NAME
-            print(parts)
-            link = parts[1].strip()
-            name = extract_display_text(link)
-            print("LINK " + link + ". NAME: " + name) ##################
-            tuning = parts[2].strip()
+    # Add unrated entries
-            notes = parts[3].strip()
+    for (author, work) in set(main_data) - set(averages):
-            pop = parts[5].strip()
+        entry = main_data[(author, work)]
+        output.append(format_row(author, entry))
-            # Add to the result dictionary
-            if (author, name) in result:
+    return ''.join(output + ["|}\n", after])
-                old_pop = result[(author, name)][2]
-                try:
-                    old_pop = int(old_pop)
-                except:
-                    result[(author, name)] = (tuning, notes, pop, link)
-                try:
-                    pop = int(pop)
-                except: # idk what this error is
-                    continue
-                '''
+def format_row(author: str, entry: dict, rating: float = None, count: int = None) -> str:
-                if old_pop < new_pop:
+    """Format row using calculated values when available."""
-                    result[(author, name)] = (tuning, notes, pop, link)
+    rating_str = f"{rating:.2f}" if rating is not None else ""
-                else:
+    count_str = str(count) if count is not None else ""
-                    continue
+     return (f"|-\n| {author} || {format_link(entry['link'])} || {entry['tuning']} || "
-                '''
+            f"{entry['notes']} || {rating_str} || {count_str}\n")
-            else:
-                result[(author, name)] = (tuning, notes, pop, link)
-     except requests.RequestException as e:
-        print(f"Error fetching URL: {e}")
-        return {}
-    return result, before, after
+def main():
+    """Main processing workflow."""
+    talk_url = "https://en.xen.wiki/index.php?title=User_talk:Hkm/Rankings&action=edit"
+    main_url = "https://en.xen.wiki/index.php?title=List_of_xenharmonic_music_by_community_ratings&action=edit"
+    averages = process_talk_page(talk_url)
+    main_data, before, after = process_main_page(main_url)
+    if main_data:
+        pyperclip.copy(build_output(before, after, averages, main_data))
+        print("Results copied to clipboard")
-talk = "https://en.xen.wiki/index.php?title=User_talk:Hkm/Rankings&action=edit"
+    n = "Namoic: [https://benyamind.bandcamp.com/track/chromacro-17-edo <nowiki>[Chromacro]</nowiki>]"
-main = "https://en.xen.wiki/index.php?title=List_of_xenharmonic_music_by_community_ratings&action=edit"
+    print(n + '\n' + format_link(n) + '\n')
-averages = process_talk(talk)
-data, before, after = process_main(main)
-clip = before
+    n = "[https://soundcloud.com/jollybard/pop-song <nowiki>[pop song]</nowiki>]"
+     print(n + '\n' + format_link(n) + '\n')
-clip += '''{| class="wikitable sortable" style="margin: auto; max-width: 800px; width: 100%;"
-|-
-! Creator !! Work !! Tuning !! Notes !! Rating !! {{nowrap|Number of ratings}}
-'''
-# print from talk page
-for (author, work), (rating, pop) in sorted(averages.items(), key=lambda a: -a[1][0]):
-    tuning, notes, _, link = data.setdefault((author, work), ("", "", "", work))
-     clip += (f"|-\n| {author} || {link} || {tuning} || {notes} || {rating:.2f} || {pop}\n")
-# print main pages not in talk page
-for i in data:
-    if (i[0], i[1]) not in averages:
-        (author, work) = i
-        tuning, notes, _, link = data.setdefault((author, work), ("", "", "", ""))
-        clip += (f"|-\n| {author} || {link} || {tuning} || {notes} || || \n")
-clip += "|}"
-clip += after
+    n = "Harmony Hacker: <u>[[Gleam]]</u>"
-pyperclip.copy(clip)
+    print(n + '\n' + format_link(n) + '\n')
-‎</syntaxhighlight>
+if __name__ == "__main__":
+    main()
+</syntaxhighlight>