first upload

uploaded api, scraper, as well as KH quotes scraped from websites
This commit is contained in:
migs 2023-12-27 17:30:27 -08:00 committed by GitHub
parent 07a9f451aa
commit b9d4d752d7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 221 additions and 0 deletions

19
api.py Normal file
View file

@ -0,0 +1,19 @@
from flask import Flask, jsonify, request
import json
import random
app = Flask(__name__)
# load quotes from JSON
def load_quotes():
with open('quotes.json', 'r') as file:
return json.load(file)
# API endpoint for getting a random quote
@app.route('/api/random-quote', methods=['GET'])
def random_quote():
quotes = load_quotes()
return jsonify(random.choice(quotes))
if __name__ == '__main__':
app.run(debug=True)

2
checksel.py Normal file
View file

@ -0,0 +1,2 @@
import selenium
print("Selenium version:", selenium.__version__)

1
quotes.json Normal file
View file

@ -0,0 +1 @@
["\"So many places I want to see... I know I\u2019ll get there someday. I\u2019ll find a way somehow. I\u2019m sure of it.\"\nARIEL, Kingdom Hearts", "\"You be careful, now, both of you.\"\nDAISY DUCK", "\"All for one, and one for all!\"\nGOOFY, Kingdom Hearts", "\"Cricket's the name. Jiminy Cricket, at your service.\"\nJIMINY CRICKET, Kingdom Hearts", "\"I want to be a part of your life no matter what. That's all.\"\nKAIRI, Kingdom Hearts III", "\"There will always be a door to the light.\"\nKING MICKEY, Kingdom Hearts II", "\"We may never meet again, but we'll never forget each other.\"\nLEON, Kingdom Hearts", "\"The heart of all kingdoms, the heart of all that lives. A dominion fit to be called Kingdom Hearts must be MY dominion.\"\nMALEFICENT, Kingdom Hearts II", "\"The name is Merlin! I'm a powerful wizard, and I'll have you know a lot of wisdom comes with age.\"\nMERLIN, Kingdom Hearts Birth by Sleep", "\"Kid, I got two words of advice for you: Attack!\"\nPHILOCTETES, Kingdom Hearts", "\"What really makes someone a hero isn't what they wear or what they say\u2014it's the things they do and how they treat everyone.\"\nQUEEN MINNIE MOUSE, Kingdom Hearts Birth by Sleep", "\"I'm going to face my past.\"\nSIMBA, Kingdom Hearts II", "\"My friends are my power!\"\nSORA, Kingdom Hearts", "\"But the girl could prove useful. And I've got the Heartless on my side. Triton, my old friend...Your day is coming.\"\nURSULA, Kingdom Hearts"]

199
scraper.py Normal file
View file

@ -0,0 +1,199 @@
from selenium import webdriver
from selenium.webdriver.common.by import By
import json #the first few of lines import different selenium libraries
# Function to configure and return a WebDriver instance
def configure_driver():
# Configure the driver (e.g., using Chrome)
driver = webdriver.Chrome()
return driver
# Function to scrape blockquote texts from a given character page
def scrape_character_page(driver, url):
# Navigate to the character page
driver.get(url)
# Find all blockquote elements and print their text
blockquotes = driver.find_elements(By.TAG_NAME, "blockquote")
# Return a list of texts from each blockquote element
return [blockquote.text for blockquote in blockquotes]
def main():
# List of character page URLs to be scraped
character_urls = [
'https://www.khdatabase.com/Ansem',
'https://www.khdatabase.com/Ariel',
'https://www.khdatabase.com/Cloud',
'https://www.khdatabase.com/Daisy_Duck',
'https://www.khdatabase.com/Donald_Duck',
'https://www.khdatabase.com/Goofy',
'https://www.khdatabase.com/Hades',
'https://www.khdatabase.com/Hercules',
'https://www.khdatabase.com/Ice_Titan',
'https://www.khdatabase.com/Jiminy_Cricket',
'https://www.khdatabase.com/Kairi',
'https://www.khdatabase.com/King_Mickey_Mouse',
'https://www.khdatabase.com/Lava_Titan',
'https://www.khdatabase.com/Leon',
'https://www.khdatabase.com/Maleficent',
'https://www.khdatabase.com/Merlin',
'https://www.khdatabase.com/Moogle',
'https://www.khdatabase.com/Philoctetes',
'https://www.khdatabase.com/Queen_Minnie_Mouse',
'https://www.khdatabase.com/Riku',
'https://www.khdatabase.com/Rock_Titan',
'https://www.khdatabase.com/Simba',
'https://www.khdatabase.com/Sora',
'https://www.khdatabase.com/Tornado_Titan',
'https://www.khdatabase.com/Ursula',
]
# Configure the WebDriver
driver = configure_driver()
# Initialize an empty list to store all scraped quotes
all_quotes = []
try:
# Iterate over each URL in the character_urls list
for url in character_urls:
# Scrape blockquote texts from the current URL
quotes = scrape_character_page(driver, url)
# Add the scraped quotes to the all_quotes list
all_quotes.extend(quotes)
finally:
# Close the WebDriver once scraping is done or if an error occurs
driver.quit()
# Write the collected quotes to a JSON file
with open('quotes.json', 'w') as file:
# Convert the list of quotes to JSON format and save i
json.dump(all_quotes, file)
# Python's way to check if this script is being run as the main program
if __name__ == "__main__":
main()
# def configure_driver():
# driver = webdriver.Chrome()
# return driver
# def get_character_urls(driver, url):
# driver.get(url)
# character_links = driver.find_elements(By.CSS_SELECTOR, "div.mw-category-group ul li a")
# urls = [link.get_attribute('href') for link in character_links]
# return urls
# def main():
# characters_list_page = "https://www.khdatabase.com/Category:Kingdom_Hearts_characters"
# driver = configure_driver()
# try:
# character_urls = get_character_urls(driver, characters_list_page)
# for url in character_urls:
# print(url)
# finally:
# driver.quit()
# if __name__ == "__main__":
# main()
# #elements = driver.find_elements(By.TAG_NAME, "blockquote")
# #for element in elements:
# # print(element.text)
# #driver.quit()