@@ Line 1: / Line 1: @@
-<syntaxhighlight lang="python" line="">
+Ask hkm for the program if you're interested in helping out with the project!
-"""
-Wiki Rating Processor
-This script processes community ratings from a talk page and merges them with main list entries
-from a xenharmonic wiki. Results are formatted into a sorted wikitable and copied to clipboard.
-disclaimer: a lot of this is ai code, because I don't have this much time on my hands
-"""
-import re
-import requests
-from bs4 import BeautifulSoup
-import pyperclip
-# Regular expressions precompiled for better performance
-WIKI_LINK_PATTERN = re.compile(r'\[\[(.*?)(?:\|(.*?))?\]\]')
-EXTERNAL_LINK_PATTERN = re.compile(r'\[(https?://\S+)\s+(.*)\]')
-TALK_PAGE_PATTERN = re.compile(r'^([^:]+):\s+(.*)\.\s+([0-9.]+)$')
-# Constants for input validation
-MAX_AUTHOR_LENGTH = 40
-MAX_DESCRIPTION_LENGTH = 100
-VALID_RATING_RANGE = (0, 5)
-WIKITABLE_HEADER = '''{| class="wikitable sortable" style="margin: auto; max-width: 800px; width: 100%;"
-|+Xenharmonic works sorted by community ranking. The "R" column is the rating of the work, \
-and the "#" column is the number of ratings given to that work.
-! Creator !! Work !! Tuning !! Notes !! R !! #
-'''
-DEFAULT_ENTRY = {
-    'tuning': '',
-    'notes': '',
-    'popularity': '0',
-    'link': ''
-}
-def format_link(text: str) -> str:
-    """Apply special formatting to each link in the string individually."""
-    text = remove_formatting(text)
-    # Process internal links to underline display text
-    def replace_internal(match):
-        content = match.group(1)
-        parts = content.split('|', 1)
-        if len(parts) == 1:
-            target = parts[0].strip()
-            #return f'[[{target}|<u>{target}</u>]]'
-            return f'<u>[[{target}]]</u>'
-        else:
-            target, display = parts
-            target = target.strip()
-            display = display.strip()
-            return f'<u>[[{target}|{display}]]</u>'
-    processed_text = re.sub(r'\[\[(.*?)\]\]', replace_internal, text, flags=re.DOTALL)
-    # Process external links according to rules
-    def replace_external(match):
-        content = match.group(1)
-        parts = content.split(' ', 1)
-        url_part = parts[0].strip().lower()
-        reconstructed = parts[0].strip() + (f' {parts[1].strip()}' if len(parts) > 1 else '')
-        if 'bandcamp' in url_part:
-            return f'[{content}]'
-        elif 'youtube' in url_part:
-            return f"''[{reconstructed}]''"
-        else:
-            return f'[{url_part} <nowiki>[{parts[1].strip()}]</nowiki>]'
-    processed_text = re.sub(r'(?<!\[)\[(?!\[)(.*?)\]', replace_external, processed_text, flags=re.DOTALL)
-    return processed_text
-def process_wiki_links(text: str) -> str:
-    """Process wiki links and external links to extract display text."""
-    # Process internal wiki links
-    text = WIKI_LINK_PATTERN.sub(lambda m: m.group(2) or m.group(1), text)
-    # Process external links to extract display text
-    text = EXTERNAL_LINK_PATTERN.sub(
-        lambda m: m.group(2).strip() if m.group(2) else '',
-        text
-    )
-    return text
-def remove_formatting(text: str) -> str:
-    """Remove wiki formatting, HTML tags, and paired <nowiki> tags."""
-    # Corrected regex to match <nowiki>[content]</nowiki>
-    text = re.sub(r'<nowiki>\[(.*?)\]</nowiki>', r'\1', text, flags=re.DOTALL)
-    # Remove any remaining HTML tags
-    text = re.sub(r'<\/?[a-z]+>', '', text)
-    # Remove bold, italic, and underline
-    text = re.sub(r"'''(.*?)'''", r'\1', text)
-    text = re.sub(r"''(.*?)''", r'\1', text)
-    text = re.sub(r"__(.*?)__", r'\1', text)
-    return text.strip()
-"""TALK"""
-def process_talk_page(url: str) -> dict:
-    """Process talk page entries to calculate community ratings.
-    Returns a dict from (author, work) to its list of ratings."""
-    ratings = {}
-    try:
-        response = requests.get(url)
-        response.raise_for_status()
-        soup = BeautifulSoup(response.text, 'html.parser')
-        if not (textarea := soup.find('textarea', {'id': 'wpTextbox1'})):
-            print("Error: Could not find talk page content")
-            return {}
-        for line in textarea.text.split('\n'):
-            if match := TALK_PAGE_PATTERN.match(line.strip()):
-                author, description, rating_str = match.groups()
-                if len(author) <= MAX_AUTHOR_LENGTH and \
-                   len(description) <= MAX_DESCRIPTION_LENGTH:
-                    process_rating(author, description, rating_str, ratings)
-    except requests.RequestException as e:
-        print(f"Network error: {e}")
-    return ratings
-def process_rating(author: str, desc: str, rating_str: str, ratings: dict):
-    """Process and validate a single rating entry."""
-    try:
-        rating = float(rating_str)
-        if VALID_RATING_RANGE[0] <= rating <= VALID_RATING_RANGE[1]:
-            key = (author, desc)
-            ratings.setdefault(key, []).append(rating)
-    except ValueError:
-        pass
-def process_main_page(url: str) -> tuple:
-    """Process main list page entries."""
-    try:
-        response = requests.get(url)
-        response.raise_for_status()
-        soup = BeautifulSoup(response.text, 'html.parser')
-        if not (textarea := soup.find('textarea', id='wpTextbox1')):
-            print("Error: Could not find main page content")
-            return {}, '', ''
-        raw_text = textarea.get_text()
-        before, table_section, after = parse_table_sections(raw_text)
-        return process_table_entries(table_section), before, update_last_modified(after)
-    except (requests.RequestException, ValueError) as e:
-        print(f"Error: {e}")
-        return {}, '', ''
-def parse_table_sections(text: str) -> tuple:
-    """Split text into sections around the wikitable."""
-    try:
-        # Split into parts before and after the table
-        before_table, table_plus_after = text.split('{|', 1)
-        table_content, after_table = table_plus_after.split('|}', 1)
-        return before_table, table_content.strip(), after_table
-    except ValueError as e:
-        print(f"Table parsing error: {e}")
-        return "", "", text
-def update_last_modified(content: str) -> str:
-    print(content)
-    """Update last modified timestamp."""
-    return (
-        "\nLast updated by ~~~~.\n" + content[content.find("Last updated by"):].split('\n', 1)[1]
-        if content.lstrip().startswith("Last updated by")
-        else "\nLast updated by ~~~~.\n" + content
-    )
-def process_table_entries(table_section: str) -> dict:
-    """Process table entries into structured data."""
-    entries = {}
-    for entry in re.split(r'\|\|-\s*', table_section.replace('\n|', '||')):
-        if entry.strip().startswith('|'):
-            process_single_entry(entry, entries)
-    return entries
-def process_single_entry(entry: str, entries: dict):
-    """Process individual table entry."""
-    parts = [p.strip() for p in re.split(r'\s*\|\|\s*', entry.lstrip('|'))]
-    if len(parts) >= 6 and parts[-1] != "}":
-        author = parts[0]
-        work_link = parts[1]
-        entry_data = {
-            'tuning': parts[2],
-            'notes': parts[3],
-            'popularity': parts[5],
-            'link': work_link
-        }
-        # update_entries(author, extract_display_text(work_link), entry_data, entries)
-        update_entries(author, remove_formatting(process_wiki_links(work_link)),
-                       entry_data, entries)
-def update_entries(author: str, work: str, new_entry: dict, entries: dict):
-    """Update entries with conflict resolution."""
-    norm_key = (author.lower(), work.lower())
-    existing = next((k for k in entries if (k[0].lower(), k[1].lower()) == norm_key), None)
-    if existing:
-        if should_replace(existing, new_entry, entries):
-            del entries[existing]
-            entries[(author, work)] = new_entry
-    else:
-        entries[(author, work)] = new_entry
-def should_replace(existing_key: tuple, new_entry: dict, entries: dict) -> bool:
-    """Determine if new entry should replace existing one."""
-    try:
-        current_pop = int(entries[existing_key]['popularity'])
-        new_pop = int(new_entry['popularity'])
-        return new_pop > current_pop
-    except (KeyError, ValueError):
-        return True
-def build_output(before: str, after: str, averages: dict, main_data: dict) -> str:
-    """Build final output using rating lists directly."""
-    output = [before, WIKITABLE_HEADER]
-    sorted_items = sorted(
-        averages.items(), # a tuple pair ((author, work), [list_of_ratings])
-        key=lambda x: (-(sum(x[1])+5) / (len(x[1])+2) ),  # Descending by average
-    )
-    for (author, work), ratings in sorted_items:
-        avg = sum(ratings) / len(ratings)
-        count = len(ratings)
-        entry = main_data.get((author, work), {**DEFAULT_ENTRY, 'link': work})
-        output.append(format_row(author, entry, avg, count))
-    # Add unrated entries
-    for (author, work) in set(main_data) - set(averages):
-        entry = main_data[(author, work)]
-        output.append(format_row(author, entry))
-    return ''.join(output + ["|}\n", after])
-def format_row(author: str, entry: dict, rating: float = None, count: int = None) -> str:
-    """Format row using calculated values when available."""
-    rating_str = f"{rating:.2f}" if rating is not None else ""
-    count_str = str(count) if count is not None else ""
-    return (f"|-\n| {author} || {format_link(entry['link'])} || {entry['tuning']} || "
-            f"{entry['notes']} || {rating_str} || {count_str}\n")
-def main():
-    """Main processing workflow."""
-    talk_url = "https://en.xen.wiki/index.php?title=User_talk:Hkm/Rankings&action=edit"
-    main_url = "https://en.xen.wiki/index.php?title=List_of_xenharmonic_music_by_community_ratings&action=edit"
-    averages = process_talk_page(talk_url)
-    main_data, before, after = process_main_page(main_url)
-    if main_data:
-        pyperclip.copy(build_output(before, after, averages, main_data))
-        print("Results copied to clipboard")
-    n = "Namoic: [https://benyamind.bandcamp.com/track/chromacro-17-edo <nowiki>[Chromacro]</nowiki>]"
-    print(n + '\n' + format_link(n) + '\n')
-    n = "[https://soundcloud.com/jollybard/pop-song <nowiki>[pop song]</nowiki>]"
-    print(n + '\n' + format_link(n) + '\n')
-    n = "Harmony Hacker: <u>[[Gleam]]</u>"
-    print(n + '\n' + format_link(n) + '\n')
-if __name__ == "__main__":
-    main()
-</syntaxhighlight>

User:Hkm/Rankings program: Difference between revisions