User:Hkm/Rankings program: Difference between revisions
ArrowHead294 (talk | contribs) mNo edit summary |
No edit summary |
||
| Line 1: | Line 1: | ||
<syntaxhighlight lang="python" line> | <syntaxhighlight lang="python" line=""> | ||
""" | |||
Wiki Rating Processor | |||
This script processes community ratings from a talk page and merges them with main list entries | |||
from a xenharmonic wiki. Results are formatted into a sorted wikitable and copied to clipboard. | |||
disclaimer: a lot of this is ai code, because I don't have this much time on my hands | |||
""" | |||
import re | import re | ||
import requests | import requests | ||
| Line 8: | Line 14: | ||
import pyperclip | import pyperclip | ||
def | # Regular expressions precompiled for better performance | ||
# Process internal links: [[ | WIKI_LINK_PATTERN = re.compile(r'\[\[(.*?)(?:\|(.*?))?\]\]') | ||
processed_text = re.sub( | EXTERNAL_LINK_PATTERN = re.compile(r'\[(https?://\S+)\s+(.*)\]') | ||
TALK_PAGE_PATTERN = re.compile(r'^([^:]+):\s+(.*)\.\s+([0-9.]+)$') | |||
# Constants for input validation | |||
MAX_AUTHOR_LENGTH = 40 | |||
MAX_DESCRIPTION_LENGTH = 100 | |||
VALID_RATING_RANGE = (0, 5) | |||
WIKITABLE_HEADER = '''{| class="wikitable sortable" style="margin: auto; max-width: 800px; width: 100%;" | |||
|+Xenharmonic works sorted by community ranking. The "R" column is the rating of the work, \ | |||
and the "#" column is the number of ratings given to that work. | |||
! Creator !! Work !! Tuning !! Notes !! R !! # | |||
''' | |||
DEFAULT_ENTRY = { | |||
'tuning': '', | |||
'notes': '', | |||
'popularity': '0', | |||
'link': '' | |||
} | |||
def format_link(text: str) -> str: | |||
"""Apply special formatting to each link in the string individually.""" | |||
text = remove_formatting(text) | |||
# Process internal links to underline display text | |||
def replace_internal(match): | |||
content = match.group(1) | |||
parts = content.split('|', 1) | |||
if len(parts) == 1: | |||
target = parts[0].strip() | |||
#return f'[[{target}|<u>{target}</u>]]' | |||
return f'<u>[[{target}]]</u>' | |||
else: | |||
target, display = parts | |||
target = target.strip() | |||
display = display.strip() | |||
return f'<u>[[{target}|{display}]]</u>' | |||
processed_text = re.sub(r'\[\[(.*?)\]\]', replace_internal, text, flags=re.DOTALL) | |||
# Process external links according to rules | |||
def replace_external(match): | |||
content = match.group(1) | |||
parts = content.split(' ', 1) | |||
url_part = parts[0].strip().lower() | |||
reconstructed = parts[0].strip() + (f' {parts[1].strip()}' if len(parts) > 1 else '') | |||
if 'bandcamp' in url_part: | |||
return f'[{content}]' | |||
elif 'youtube' in url_part: | |||
return f"''[{reconstructed}]''" | |||
else: | |||
return f'[{url_part} <nowiki>[{parts[1].strip()}]</nowiki>]' | |||
processed_text = re.sub(r'(?<!\[)\[(?!\[)(.*?)\]', replace_external, processed_text, flags=re.DOTALL) | |||
processed_text = re.sub( | |||
return processed_text | return processed_text | ||
def | def process_wiki_links(text: str) -> str: | ||
# | """Process wiki links and external links to extract display text.""" | ||
# Process internal wiki links | |||
text = WIKI_LINK_PATTERN.sub(lambda m: m.group(2) or m.group(1), text) | |||
# Process external links to extract display text | |||
text = EXTERNAL_LINK_PATTERN.sub( | |||
lambda m: m.group(2).strip() if m.group(2) else '', | |||
text | |||
) | |||
return text | |||
def remove_formatting(text: str) -> str: | |||
"""Remove wiki formatting, HTML tags, and paired <nowiki> tags.""" | |||
# Corrected regex to match <nowiki>[content]</nowiki> | |||
text = re.sub(r'<nowiki>\[(.*?)\]</nowiki>', r'\1', text, flags=re.DOTALL) | |||
# Remove any remaining HTML tags | |||
text = re.sub(r'<\/?[a-z]+>', '', text) | |||
# Remove bold, italic, and underline | |||
text = re.sub(r"'''(.*?)'''", r'\1', text) | |||
text = re.sub(r"''(.*?)''", r'\1', text) | |||
text = re.sub(r"__(.*?)__", r'\1', text) | |||
return text.strip() | |||
"""TALK""" | |||
def process_talk_page(url: str) -> dict: | |||
"""Process talk page entries to calculate community ratings. | |||
Returns a dict from (author, work) to its list of ratings.""" | |||
ratings = {} | |||
try: | try: | ||
response = requests.get(url) | response = requests.get(url) | ||
response.raise_for_status() | response.raise_for_status() | ||
soup = BeautifulSoup(response.text, 'html.parser') | soup = BeautifulSoup(response.text, 'html.parser') | ||
textarea = soup.find('textarea', {'id': 'wpTextbox1'}) | if not (textarea := soup.find('textarea', {'id': 'wpTextbox1'})): | ||
print("Error: Could not find talk page content") | |||
print("Could not find | |||
return {} | return {} | ||
for line in | for line in textarea.text.split('\n'): | ||
if match := TALK_PAGE_PATTERN.match(line.strip()): | |||
match = | author, description, rating_str = match.groups() | ||
if len(author) <= MAX_AUTHOR_LENGTH and \ | |||
len(description) <= MAX_DESCRIPTION_LENGTH: | |||
process_rating(author, description, rating_str, ratings) | |||
except requests.RequestException as e: | except requests.RequestException as e: | ||
print(f" | print(f"Network error: {e}") | ||
return ratings | |||
def process_rating(author: str, desc: str, rating_str: str, ratings: dict): | |||
"""Process and validate a single rating entry.""" | |||
try: | |||
rating = float(rating_str) | |||
if VALID_RATING_RANGE[0] <= rating <= VALID_RATING_RANGE[1]: | |||
key = (author, desc) | |||
ratings.setdefault(key, []).append(rating) | |||
except ValueError: | |||
pass | |||
def | def process_main_page(url: str) -> tuple: | ||
"""Process main list page entries.""" | |||
try: | try: | ||
response = requests.get(url) | response = requests.get(url) | ||
response.raise_for_status() | response.raise_for_status() | ||
soup = BeautifulSoup(response.text, 'html.parser') | soup = BeautifulSoup(response.text, 'html.parser') | ||
textarea = soup.find('textarea', id='wpTextbox1') | if not (textarea := soup.find('textarea', id='wpTextbox1')): | ||
print("Error: Could not find main page content") | |||
print("Could not find | return {}, '', '' | ||
return {} | |||
raw_text = textarea.get_text() | raw_text = textarea.get_text() | ||
before, table_section, after = parse_table_sections(raw_text) | |||
return process_table_entries(table_section), before, update_last_modified(after) | |||
except (requests.RequestException, ValueError) as e: | |||
print(f"Error: {e}") | |||
return {}, '', '' | |||
# | def parse_table_sections(text: str) -> tuple: | ||
"""Split text into sections around the wikitable.""" | |||
try: | |||
# Split into parts before and after the table | |||
before_table, table_plus_after = text.split('{|', 1) | |||
table_content, after_table = table_plus_after.split('|}', 1) | |||
return before_table, table_content.strip(), after_table | |||
except ValueError as e: | |||
print(f"Table parsing error: {e}") | |||
return "", "", text | |||
def update_last_modified(content: str) -> str: | |||
print(content) | |||
"""Update last modified timestamp.""" | |||
else | return ( | ||
"\nLast updated by ~~~~.\n" + content[content.find("Last updated by"):].split('\n', 1)[1] | |||
if content.lstrip().startswith("Last updated by") | |||
else "\nLast updated by ~~~~.\n" + content | |||
) | |||
def process_table_entries(table_section: str) -> dict: | |||
"""Process table entries into structured data.""" | |||
entries = {} | |||
for entry in re.split(r'\|\|-\s*', table_section.replace('\n|', '||')): | |||
if entry.strip().startswith('|'): | |||
process_single_entry(entry, entries) | |||
return entries | |||
def process_single_entry(entry: str, entries: dict): | |||
"""Process individual table entry.""" | |||
parts = [p.strip() for p in re.split(r'\s*\|\|\s*', entry.lstrip('|'))] | |||
if len(parts) >= 6 and parts[-1] != "}": | |||
author = parts[0] | |||
work_link = parts[1] | |||
entry_data = { | |||
'tuning': parts[2], | |||
'notes': parts[3], | |||
'popularity': parts[5], | |||
'link': work_link | |||
} | |||
# update_entries(author, extract_display_text(work_link), entry_data, entries) | |||
update_entries(author, remove_formatting(process_wiki_links(work_link)), | |||
entry_data, entries) | |||
def update_entries(author: str, work: str, new_entry: dict, entries: dict): | |||
"""Update entries with conflict resolution.""" | |||
norm_key = (author.lower(), work.lower()) | |||
existing = next((k for k in entries if (k[0].lower(), k[1].lower()) == norm_key), None) | |||
if existing: | |||
if should_replace(existing, new_entry, entries): | |||
del entries[existing] | |||
entries[(author, work)] = new_entry | |||
else: | |||
entries[(author, work)] = new_entry | |||
def should_replace(existing_key: tuple, new_entry: dict, entries: dict) -> bool: | |||
"""Determine if new entry should replace existing one.""" | |||
try: | |||
current_pop = int(entries[existing_key]['popularity']) | |||
new_pop = int(new_entry['popularity']) | |||
return new_pop > current_pop | |||
except (KeyError, ValueError): | |||
return True | |||
def build_output(before: str, after: str, averages: dict, main_data: dict) -> str: | |||
"""Build final output using rating lists directly.""" | |||
output = [before, WIKITABLE_HEADER] | |||
sorted_items = sorted( | |||
averages.items(), # a tuple pair ((author, work), [list_of_ratings]) | |||
key=lambda x: (-(sum(x[1])+5) / (len(x[1])+2) ), # Descending by average | |||
) | |||
for (author, work), ratings in sorted_items: | |||
avg = sum(ratings) / len(ratings) | |||
count = len(ratings) | |||
entry = main_data.get((author, work), {**DEFAULT_ENTRY, 'link': work}) | |||
output.append(format_row(author, entry, avg, count)) | |||
# Add unrated entries | |||
for (author, work) in set(main_data) - set(averages): | |||
entry = main_data[(author, work)] | |||
output.append(format_row(author, entry)) | |||
return ''.join(output + ["|}\n", after]) | |||
def format_row(author: str, entry: dict, rating: float = None, count: int = None) -> str: | |||
"""Format row using calculated values when available.""" | |||
rating_str = f"{rating:.2f}" if rating is not None else "" | |||
count_str = str(count) if count is not None else "" | |||
return (f"|-\n| {author} || {format_link(entry['link'])} || {entry['tuning']} || " | |||
f"{entry['notes']} || {rating_str} || {count_str}\n") | |||
def main(): | |||
"""Main processing workflow.""" | |||
talk_url = "https://en.xen.wiki/index.php?title=User_talk:Hkm/Rankings&action=edit" | |||
main_url = "https://en.xen.wiki/index.php?title=List_of_xenharmonic_music_by_community_ratings&action=edit" | |||
averages = process_talk_page(talk_url) | |||
main_data, before, after = process_main_page(main_url) | |||
if main_data: | |||
pyperclip.copy(build_output(before, after, averages, main_data)) | |||
print("Results copied to clipboard") | |||
n = "Namoic: [https://benyamind.bandcamp.com/track/chromacro-17-edo <nowiki>[Chromacro]</nowiki>]" | |||
print(n + '\n' + format_link(n) + '\n') | |||
n = "[https://soundcloud.com/jollybard/pop-song <nowiki>[pop song]</nowiki>]" | |||
print(n + '\n' + format_link(n) + '\n') | |||
n = "Harmony Hacker: <u>[[Gleam]]</u>" | |||
print(n + '\n' + format_link(n) + '\n') | |||
if __name__ == "__main__": | |||
main() | |||
</syntaxhighlight> | |||