|
|
| (6 intermediate revisions by 2 users not shown) |
| Line 1: |
Line 1: |
| <syntaxhighlight lang="python" line>
| | Ask hkm for the program if you're interested in helping out with the project! |
| | |
| import re
| |
| import requests
| |
| from bs4 import BeautifulSoup
| |
| | |
| def process_talk(url):
| |
| pattern = r'^([^:]+):\s+([^.]*)\.\s+([0-9.]+)$'
| |
| data = {}
| |
| | |
| try:
| |
| response = requests.get(url)
| |
| response.raise_for_status() # Raise exception for HTTP errors
| |
|
| |
| # Parse the edit page content to find the wikitext
| |
| soup = BeautifulSoup(response.text, 'html.parser')
| |
| textarea = soup.find('textarea', {'id': 'wpTextbox1'})
| |
|
| |
| if not textarea:
| |
| print("Could not find document content in the page")
| |
| return {}
| |
|
| |
| lines = textarea.text.split('\n')
| |
| | |
| for line in lines:
| |
| line = line.strip()
| |
| match = re.match(pattern, line)
| |
| if not match:
| |
| continue
| |
|
| |
| thing_a = match.group(1)
| |
| thing_b = match.group(2)
| |
| number_str = match.group(3)
| |
|
| |
| if len(thing_a) == 0 or len(thing_b) == 0:
| |
| continue
| |
|
| |
| if len(thing_a) > 40 or len(thing_b) > 40:
| |
| continue
| |
| | |
| try:
| |
| number = float(number_str)
| |
| if not (0 <= number <= 5):
| |
| continue
| |
| except ValueError:
| |
| continue
| |
| | |
| key = (thing_a, thing_b)
| |
| value = data.setdefault(key, ([0, 5], 0))
| |
| | |
| data[key] = (value[0]+[number], value[1]+1)
| |
|
| |
|
| |
| except requests.RequestException as e:
| |
| print(f"Error fetching URL: {e}")
| |
| return {}
| |
| | |
| # Calculate averages
| |
| return {key: (sum(nums)/len(nums), pop) for key, (nums, pop) in data.items()}
| |
| | |
| def process_main(url): # i'm sorry you have to deal with this code
| |
| try:
| |
| # Fetch the page content
| |
| response = requests.get(url)
| |
| response.raise_for_status() # Ensure the request was successful
| |
|
| |
| # Parse the HTML to get the raw wikitext from the textarea
| |
| soup = BeautifulSoup(response.text, 'html.parser')
| |
| textarea = soup.find('textarea', id='wpTextbox1')
| |
| if not textarea:
| |
| print("Could not find document content in the page")
| |
| return {}
| |
| | |
| raw_text = textarea.get_text()
| |
| | |
| # Preprocess the text: replace newline followed by | with ||, then split into entries
| |
| processed_text = raw_text.replace('\n|', '||')
| |
| entries = re.split(r'\|\|-\s*', processed_text)
| |
| | |
| result = {}
| |
| | |
| for entry in entries:
| |
| entry = entry.strip()
| |
| if not entry.startswith('|'):
| |
| continue
| |
|
| |
| # Split the entry into parts, considering possible spaces around ||
| |
| parts = re.split(r'\s*\|\|\s*', entry)
| |
| | |
| if parts[-1] == "}":
| |
| parts = parts[:-1] # sorry
| |
| | |
| if parts[0] == "" and len(parts) == 7:
| |
| parts = parts[1:]
| |
| else:
| |
| print("AN UNEXPECTED THING HAPPENED.")
| |
| print("CHECK THAT THE TABLE LOOKS NORMAL.")
| |
| print("IF THE TABLE LOOKS NORMAL, TELL HKM HIS CODE IS BROKEN.")
| |
| print(parts)
| |
| if len(parts) < 6:
| |
| continue # Not enough columns
| |
|
| |
| # Extract AUTHOR from the first part
| |
| author = parts[0].lstrip('|').strip()
| |
| if not author:
| |
| continue
| |
|
| |
| # Split the second part into LINK and NAME
| |
| link_name = parts[1].strip()
| |
| if link_name[0] == "[" and link_name[-1] == "]":
| |
| link_name = link_name[1:-1]
| |
| split_link_name = link_name.split(maxsplit=1)
| |
| if len(split_link_name) < 2:
| |
| continue
| |
| link, name = split_link_name[0], split_link_name[1]
| |
| else:
| |
| name = link_name
| |
| link = ""
| |
|
| |
| tuning = parts[2].strip()
| |
| notes = parts[3].strip()
| |
| pop = parts[5].strip()
| |
| | |
| print("AUTHOR, NAME, LINK, TUNING") ###############################3
| |
| print(author, name, link, tuning)
| |
| print()
| |
|
| |
| | |
| # pop is collected in case the user puts in a new entry at the top
| |
| # accidentally trashing old tuning and notes
| |
|
| |
| # Add to the result dictionary
| |
| if (author, name) in result:
| |
| old_pop = result[(author, name)][2]
| |
|
| |
| try:
| |
| old_pop = int(old_pop)
| |
| except:
| |
| result[(author, name)] = (tuning, notes, pop, link)
| |
|
| |
| try:
| |
| pop = int(pop)
| |
| except: # idk what this error is
| |
| continue
| |
| | |
| if old_pop < new_pop:
| |
| result[(author, name)] = (tuning, notes, pop, link)
| |
| else:
| |
| continue
| |
| else:
| |
| result[(author, name)] = (tuning, notes, pop, link)
| |
| except requests.RequestException as e:
| |
| print(f"Error fetching URL: {e}")
| |
| return {}
| |
| | |
| return result
| |
|
| |
| | |
| | |
| | |
| talk = "https://en.xen.wiki/index.php?title=User_talk:Hkm/Rankings&action=edit"
| |
| main = "https://en.xen.wiki/index.php?title=User:Hkm/Rankings&action=edit"
| |
| averages = process_talk(talk)
| |
| data = process_main(main)
| |
| | |
| print('''
| |
| {| class="wikitable" style="margin: auto;"
| |
| ! Creator !! Work !! Tuning !! Notes !! Rating !! Pop ''')
| |
| | |
| for (author, work), (rating, pop) in sorted(averages.items(), key=lambda a: -a[1][0]):
| |
| tuning, notes, _, link = data.setdefault((author, work), ("", "", "", ""))
| |
| if link:
| |
| print(f"|-\n| {author} || [{link} {work}] || {tuning} || {notes} || {rating:.2f} || {pop}")
| |
| else:
| |
| print(f"|-\n| {author} || {work} || {tuning} || {notes} || {rating:.2f} || {pop}")
| |
| | |
| print("|}")
| |
| | |
| | |
| </syntaxhighlight>
| |
Ask hkm for the program if you're interested in helping out with the project!