User:Hkm/Rankings program: Difference between revisions

Hkm (talk | contribs)
No edit summary
Hkm (talk | contribs)
Replaced content with "Ask hkm for the program if you're interested in helping out with the project!"
Tag: Replaced
 
Line 1: Line 1:
<syntaxhighlight lang="python" line="">
Ask hkm for the program if you're interested in helping out with the project!
"""
Wiki Rating Processor
 
This script processes community ratings from a talk page and merges them with main list entries
from a xenharmonic wiki. Results are formatted into a sorted wikitable and copied to clipboard.
 
disclaimer: a lot of this is ai code, because I don't have this much time on my hands
"""
 
import re
import requests
from bs4 import BeautifulSoup
import pyperclip
 
# Regular expressions precompiled for better performance
WIKI_LINK_PATTERN = re.compile(r'\[\[(.*?)(?:\|(.*?))?\]\]')
EXTERNAL_LINK_PATTERN = re.compile(r'\[(https?://\S+)\s+(.*)\]')
TALK_PAGE_PATTERN = re.compile(r'^([^:]+):\s+(.*)\.\s+([0-9.]+)$')
 
# Constants for input validation
MAX_AUTHOR_LENGTH = 40
MAX_DESCRIPTION_LENGTH = 100
VALID_RATING_RANGE = (0, 5)
WIKITABLE_HEADER = '''{| class="wikitable sortable" style="margin: auto; max-width: 800px; width: 100%;"
|+Xenharmonic works sorted by community ranking. The "R" column is the rating of the work, \
and the "#" column is the number of ratings given to that work.
! Creator !! Work !! Tuning !! Notes !! R !! #
 
'''
 
DEFAULT_ENTRY = {
    'tuning': '',
    'notes': '',
    'popularity': '0',
    'link': ''
}
 
def format_link(text: str) -> str:
    """Apply special formatting to each link in the string individually."""
    text = remove_formatting(text)
    # Process internal links to underline display text
    def replace_internal(match):
        content = match.group(1)
        parts = content.split('|', 1)
        if len(parts) == 1:
            target = parts[0].strip()
            #return f'[[{target}|<u>{target}</u>]]'
            return f'<u>[[{target}]]</u>'
        else:
            target, display = parts
            target = target.strip()
            display = display.strip()
            return f'<u>[[{target}|{display}]]</u>'
   
    processed_text = re.sub(r'\[\[(.*?)\]\]', replace_internal, text, flags=re.DOTALL)
   
    # Process external links according to rules
    def replace_external(match):
        content = match.group(1)
        parts = content.split(' ', 1)
        url_part = parts[0].strip().lower()
        reconstructed = parts[0].strip() + (f' {parts[1].strip()}' if len(parts) > 1 else '')
       
        if 'bandcamp' in url_part:
            return f'[{content}]'
        elif 'youtube' in url_part:
            return f"''[{reconstructed}]''"
        else:
            return f'[{url_part} <nowiki>[{parts[1].strip()}]</nowiki>]'
   
    processed_text = re.sub(r'(?<!\[)\[(?!\[)(.*?)\]', replace_external, processed_text, flags=re.DOTALL)
   
    return processed_text
 
def process_wiki_links(text: str) -> str:
    """Process wiki links and external links to extract display text."""
    # Process internal wiki links
    text = WIKI_LINK_PATTERN.sub(lambda m: m.group(2) or m.group(1), text)
    # Process external links to extract display text
    text = EXTERNAL_LINK_PATTERN.sub(
        lambda m: m.group(2).strip() if m.group(2) else '',
        text
    )
    return text
 
def remove_formatting(text: str) -> str:
    """Remove wiki formatting, HTML tags, and paired <nowiki> tags."""
    # Corrected regex to match <nowiki>[content]</nowiki>
    text = re.sub(r'<nowiki>\[(.*?)\]</nowiki>', r'\1', text, flags=re.DOTALL)
    # Remove any remaining HTML tags
    text = re.sub(r'<\/?[a-z]+>', '', text)
    # Remove bold, italic, and underline
    text = re.sub(r"'''(.*?)'''", r'\1', text)
    text = re.sub(r"''(.*?)''", r'\1', text)
    text = re.sub(r"__(.*?)__", r'\1', text)
    return text.strip()
 
"""TALK"""
 
def process_talk_page(url: str) -> dict:
    """Process talk page entries to calculate community ratings.
    Returns a dict from (author, work) to its list of ratings."""
    ratings = {}
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        if not (textarea := soup.find('textarea', {'id': 'wpTextbox1'})):
            print("Error: Could not find talk page content")
            return {}
 
        for line in textarea.text.split('\n'):
            if match := TALK_PAGE_PATTERN.match(line.strip()):
                author, description, rating_str = match.groups()
                if len(author) <= MAX_AUTHOR_LENGTH and \
                  len(description) <= MAX_DESCRIPTION_LENGTH:
                    process_rating(author, description, rating_str, ratings)
 
    except requests.RequestException as e:
        print(f"Network error: {e}")
    return ratings
 
def process_rating(author: str, desc: str, rating_str: str, ratings: dict):
    """Process and validate a single rating entry."""
    try:
        rating = float(rating_str)
        if VALID_RATING_RANGE[0] <= rating <= VALID_RATING_RANGE[1]:
            key = (author, desc)
            ratings.setdefault(key, []).append(rating)
    except ValueError:
        pass
 
def process_main_page(url: str) -> tuple:
    """Process main list page entries."""
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        if not (textarea := soup.find('textarea', id='wpTextbox1')):
            print("Error: Could not find main page content")
            return {}, '', ''
 
        raw_text = textarea.get_text()
        before, table_section, after = parse_table_sections(raw_text)
        return process_table_entries(table_section), before, update_last_modified(after)
 
    except (requests.RequestException, ValueError) as e:
        print(f"Error: {e}")
        return {}, '', ''
 
def parse_table_sections(text: str) -> tuple:
    """Split text into sections around the wikitable."""
    try:
        # Split into parts before and after the table
        before_table, table_plus_after = text.split('{|', 1)
        table_content, after_table = table_plus_after.split('|}', 1)
        return before_table, table_content.strip(), after_table
    except ValueError as e:
        print(f"Table parsing error: {e}")
        return "", "", text
 
def update_last_modified(content: str) -> str:
    print(content)
    """Update last modified timestamp."""
    return (
        "\nLast updated by ~~~~.\n" + content[content.find("Last updated by"):].split('\n', 1)[1]
        if content.lstrip().startswith("Last updated by")
        else "\nLast updated by ~~~~.\n" + content
    )
 
def process_table_entries(table_section: str) -> dict:
    """Process table entries into structured data."""
    entries = {}
    for entry in re.split(r'\|\|-\s*', table_section.replace('\n|', '||')):
        if entry.strip().startswith('|'):
            process_single_entry(entry, entries)
    return entries
 
def process_single_entry(entry: str, entries: dict):
    """Process individual table entry."""
    parts = [p.strip() for p in re.split(r'\s*\|\|\s*', entry.lstrip('|'))]
    if len(parts) >= 6 and parts[-1] != "}":
        author = parts[0]
        work_link = parts[1]
        entry_data = {
            'tuning': parts[2],
            'notes': parts[3],
            'popularity': parts[5],
            'link': work_link
        }
        # update_entries(author, extract_display_text(work_link), entry_data, entries)
        update_entries(author, remove_formatting(process_wiki_links(work_link)),
                      entry_data, entries)
 
def update_entries(author: str, work: str, new_entry: dict, entries: dict):
    """Update entries with conflict resolution."""
    norm_key = (author.lower(), work.lower())
    existing = next((k for k in entries if (k[0].lower(), k[1].lower()) == norm_key), None)
   
    if existing:
        if should_replace(existing, new_entry, entries):
            del entries[existing]
            entries[(author, work)] = new_entry
    else:
        entries[(author, work)] = new_entry
 
def should_replace(existing_key: tuple, new_entry: dict, entries: dict) -> bool:
    """Determine if new entry should replace existing one."""
    try:
        current_pop = int(entries[existing_key]['popularity'])
        new_pop = int(new_entry['popularity'])
        return new_pop > current_pop
    except (KeyError, ValueError):
        return True
 
def build_output(before: str, after: str, averages: dict, main_data: dict) -> str:
    """Build final output using rating lists directly."""
    output = [before, WIKITABLE_HEADER]
 
    sorted_items = sorted(
        averages.items(), # a tuple pair ((author, work), [list_of_ratings])
        key=lambda x: (-(sum(x[1])+5) / (len(x[1])+2) ),  # Descending by average
    )
   
    for (author, work), ratings in sorted_items:
        avg = sum(ratings) / len(ratings)
        count = len(ratings)
        entry = main_data.get((author, work), {**DEFAULT_ENTRY, 'link': work})
        output.append(format_row(author, entry, avg, count))
   
    # Add unrated entries
    for (author, work) in set(main_data) - set(averages):
        entry = main_data[(author, work)]
        output.append(format_row(author, entry))
   
    return ''.join(output + ["|}\n", after])
 
def format_row(author: str, entry: dict, rating: float = None, count: int = None) -> str:
    """Format row using calculated values when available."""
    rating_str = f"{rating:.2f}" if rating is not None else ""
    count_str = str(count) if count is not None else ""
    return (f"|-\n| {author} || {format_link(entry['link'])} || {entry['tuning']} || "
            f"{entry['notes']} || {rating_str} || {count_str}\n")
 
def main():
    """Main processing workflow."""
    talk_url = "https://en.xen.wiki/index.php?title=User_talk:Hkm/Rankings&action=edit"
    main_url = "https://en.xen.wiki/index.php?title=List_of_xenharmonic_music_by_community_ratings&action=edit"
 
    averages = process_talk_page(talk_url)
    main_data, before, after = process_main_page(main_url)
   
    if main_data:
        pyperclip.copy(build_output(before, after, averages, main_data))
        print("Results copied to clipboard")
 
    n = "Namoic: [https://benyamind.bandcamp.com/track/chromacro-17-edo <nowiki>[Chromacro]</nowiki>]"
    print(n + '\n' + format_link(n) + '\n')
 
    n = "[https://soundcloud.com/jollybard/pop-song <nowiki>[pop song]</nowiki>]"
    print(n + '\n' + format_link(n) + '\n')
 
    n = "Harmony Hacker: <u>[[Gleam]]</u>"
    print(n + '\n' + format_link(n) + '\n')
 
if __name__ == "__main__":
    main()
</syntaxhighlight>