From b9d4d752d73d9d037ea46f549fefe6d0aa277968 Mon Sep 17 00:00:00 2001 From: migs <154289523+miggymofongo@users.noreply.github.com> Date: Wed, 27 Dec 2023 17:30:27 -0800 Subject: [PATCH] first upload uploaded api, scraper, as well as KH quotes scraped from websites --- api.py | 19 +++++ checksel.py | 2 + quotes.json | 1 + scraper.py | 199 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 221 insertions(+) create mode 100644 api.py create mode 100644 checksel.py create mode 100644 quotes.json create mode 100644 scraper.py diff --git a/api.py b/api.py new file mode 100644 index 0000000..5f7e65f --- /dev/null +++ b/api.py @@ -0,0 +1,19 @@ +from flask import Flask, jsonify, request +import json +import random + +app = Flask(__name__) + +# load quotes from JSON +def load_quotes(): + with open('quotes.json', 'r') as file: + return json.load(file) + +# API endpoint for getting a random quote +@app.route('/api/random-quote', methods=['GET']) +def random_quote(): + quotes = load_quotes() + return jsonify(random.choice(quotes)) + +if __name__ == '__main__': + app.run(debug=True) diff --git a/checksel.py b/checksel.py new file mode 100644 index 0000000..202e4d9 --- /dev/null +++ b/checksel.py @@ -0,0 +1,2 @@ +import selenium +print("Selenium version:", selenium.__version__) diff --git a/quotes.json b/quotes.json new file mode 100644 index 0000000..9fc0875 --- /dev/null +++ b/quotes.json @@ -0,0 +1 @@ +["\"So many places I want to see... I know I\u2019ll get there someday. I\u2019ll find a way somehow. I\u2019m sure of it.\"\nARIEL, Kingdom Hearts", "\"You be careful, now, both of you.\"\nDAISY DUCK", "\"All for one, and one for all!\"\nGOOFY, Kingdom Hearts", "\"Cricket's the name. Jiminy Cricket, at your service.\"\nJIMINY CRICKET, Kingdom Hearts", "\"I want to be a part of your life no matter what. That's all.\"\nKAIRI, Kingdom Hearts III", "\"There will always be a door to the light.\"\nKING MICKEY, Kingdom Hearts II", "\"We may never meet again, but we'll never forget each other.\"\nLEON, Kingdom Hearts", "\"The heart of all kingdoms, the heart of all that lives. A dominion fit to be called Kingdom Hearts must be MY dominion.\"\nMALEFICENT, Kingdom Hearts II", "\"The name is Merlin! I'm a powerful wizard, and I'll have you know a lot of wisdom comes with age.\"\nMERLIN, Kingdom Hearts Birth by Sleep", "\"Kid, I got two words of advice for you: Attack!\"\nPHILOCTETES, Kingdom Hearts", "\"What really makes someone a hero isn't what they wear or what they say\u2014it's the things they do and how they treat everyone.\"\nQUEEN MINNIE MOUSE, Kingdom Hearts Birth by Sleep", "\"I'm going to face my past.\"\nSIMBA, Kingdom Hearts II", "\"My friends are my power!\"\nSORA, Kingdom Hearts", "\"But the girl could prove useful. And I've got the Heartless on my side. Triton, my old friend...Your day is coming.\"\nURSULA, Kingdom Hearts"] \ No newline at end of file diff --git a/scraper.py b/scraper.py new file mode 100644 index 0000000..39a4e0a --- /dev/null +++ b/scraper.py @@ -0,0 +1,199 @@ +from selenium import webdriver +from selenium.webdriver.common.by import By +import json #the first few of lines import different selenium libraries + +# Function to configure and return a WebDriver instance +def configure_driver(): + # Configure the driver (e.g., using Chrome) + driver = webdriver.Chrome() + return driver + +# Function to scrape blockquote texts from a given character page +def scrape_character_page(driver, url): + # Navigate to the character page + driver.get(url) + + # Find all blockquote elements and print their text + blockquotes = driver.find_elements(By.TAG_NAME, "blockquote") + # Return a list of texts from each blockquote element + return [blockquote.text for blockquote in blockquotes] + +def main(): + # List of character page URLs to be scraped + character_urls = [ + 'https://www.khdatabase.com/Ansem', + 'https://www.khdatabase.com/Ariel', + 'https://www.khdatabase.com/Cloud', + 'https://www.khdatabase.com/Daisy_Duck', + 'https://www.khdatabase.com/Donald_Duck', + 'https://www.khdatabase.com/Goofy', + 'https://www.khdatabase.com/Hades', + 'https://www.khdatabase.com/Hercules', + 'https://www.khdatabase.com/Ice_Titan', + 'https://www.khdatabase.com/Jiminy_Cricket', + 'https://www.khdatabase.com/Kairi', + 'https://www.khdatabase.com/King_Mickey_Mouse', + 'https://www.khdatabase.com/Lava_Titan', + 'https://www.khdatabase.com/Leon', + 'https://www.khdatabase.com/Maleficent', + 'https://www.khdatabase.com/Merlin', + 'https://www.khdatabase.com/Moogle', + 'https://www.khdatabase.com/Philoctetes', + 'https://www.khdatabase.com/Queen_Minnie_Mouse', + 'https://www.khdatabase.com/Riku', + 'https://www.khdatabase.com/Rock_Titan', + 'https://www.khdatabase.com/Simba', + 'https://www.khdatabase.com/Sora', + 'https://www.khdatabase.com/Tornado_Titan', + 'https://www.khdatabase.com/Ursula', + ] + + # Configure the WebDriver + driver = configure_driver() + + # Initialize an empty list to store all scraped quotes + all_quotes = [] + try: + # Iterate over each URL in the character_urls list + for url in character_urls: + # Scrape blockquote texts from the current URL + quotes = scrape_character_page(driver, url) + # Add the scraped quotes to the all_quotes list + all_quotes.extend(quotes) + finally: + # Close the WebDriver once scraping is done or if an error occurs + driver.quit() + + # Write the collected quotes to a JSON file + with open('quotes.json', 'w') as file: + # Convert the list of quotes to JSON format and save i + json.dump(all_quotes, file) + + +# Python's way to check if this script is being run as the main program +if __name__ == "__main__": + main() + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +# def configure_driver(): +# driver = webdriver.Chrome() +# return driver + + +# def get_character_urls(driver, url): +# driver.get(url) +# character_links = driver.find_elements(By.CSS_SELECTOR, "div.mw-category-group ul li a") +# urls = [link.get_attribute('href') for link in character_links] + +# return urls + +# def main(): +# characters_list_page = "https://www.khdatabase.com/Category:Kingdom_Hearts_characters" + +# driver = configure_driver() + +# try: +# character_urls = get_character_urls(driver, characters_list_page) +# for url in character_urls: +# print(url) +# finally: +# driver.quit() + +# if __name__ == "__main__": +# main() + +# #elements = driver.find_elements(By.TAG_NAME, "blockquote") + +# #for element in elements: +# # print(element.text) + +# #driver.quit()