first upload
uploaded api, scraper, as well as KH quotes scraped from websites
This commit is contained in:
parent
07a9f451aa
commit
b9d4d752d7
4 changed files with 221 additions and 0 deletions
19
api.py
Normal file
19
api.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
from flask import Flask, jsonify, request
|
||||
import json
|
||||
import random
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
# load quotes from JSON
|
||||
def load_quotes():
|
||||
with open('quotes.json', 'r') as file:
|
||||
return json.load(file)
|
||||
|
||||
# API endpoint for getting a random quote
|
||||
@app.route('/api/random-quote', methods=['GET'])
|
||||
def random_quote():
|
||||
quotes = load_quotes()
|
||||
return jsonify(random.choice(quotes))
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(debug=True)
|
2
checksel.py
Normal file
2
checksel.py
Normal file
|
@ -0,0 +1,2 @@
|
|||
import selenium
|
||||
print("Selenium version:", selenium.__version__)
|
1
quotes.json
Normal file
1
quotes.json
Normal file
|
@ -0,0 +1 @@
|
|||
["\"So many places I want to see... I know I\u2019ll get there someday. I\u2019ll find a way somehow. I\u2019m sure of it.\"\nARIEL, Kingdom Hearts", "\"You be careful, now, both of you.\"\nDAISY DUCK", "\"All for one, and one for all!\"\nGOOFY, Kingdom Hearts", "\"Cricket's the name. Jiminy Cricket, at your service.\"\nJIMINY CRICKET, Kingdom Hearts", "\"I want to be a part of your life no matter what. That's all.\"\nKAIRI, Kingdom Hearts III", "\"There will always be a door to the light.\"\nKING MICKEY, Kingdom Hearts II", "\"We may never meet again, but we'll never forget each other.\"\nLEON, Kingdom Hearts", "\"The heart of all kingdoms, the heart of all that lives. A dominion fit to be called Kingdom Hearts must be MY dominion.\"\nMALEFICENT, Kingdom Hearts II", "\"The name is Merlin! I'm a powerful wizard, and I'll have you know a lot of wisdom comes with age.\"\nMERLIN, Kingdom Hearts Birth by Sleep", "\"Kid, I got two words of advice for you: Attack!\"\nPHILOCTETES, Kingdom Hearts", "\"What really makes someone a hero isn't what they wear or what they say\u2014it's the things they do and how they treat everyone.\"\nQUEEN MINNIE MOUSE, Kingdom Hearts Birth by Sleep", "\"I'm going to face my past.\"\nSIMBA, Kingdom Hearts II", "\"My friends are my power!\"\nSORA, Kingdom Hearts", "\"But the girl could prove useful. And I've got the Heartless on my side. Triton, my old friend...Your day is coming.\"\nURSULA, Kingdom Hearts"]
|
199
scraper.py
Normal file
199
scraper.py
Normal file
|
@ -0,0 +1,199 @@
|
|||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
import json #the first few of lines import different selenium libraries
|
||||
|
||||
# Function to configure and return a WebDriver instance
|
||||
def configure_driver():
|
||||
# Configure the driver (e.g., using Chrome)
|
||||
driver = webdriver.Chrome()
|
||||
return driver
|
||||
|
||||
# Function to scrape blockquote texts from a given character page
|
||||
def scrape_character_page(driver, url):
|
||||
# Navigate to the character page
|
||||
driver.get(url)
|
||||
|
||||
# Find all blockquote elements and print their text
|
||||
blockquotes = driver.find_elements(By.TAG_NAME, "blockquote")
|
||||
# Return a list of texts from each blockquote element
|
||||
return [blockquote.text for blockquote in blockquotes]
|
||||
|
||||
def main():
|
||||
# List of character page URLs to be scraped
|
||||
character_urls = [
|
||||
'https://www.khdatabase.com/Ansem',
|
||||
'https://www.khdatabase.com/Ariel',
|
||||
'https://www.khdatabase.com/Cloud',
|
||||
'https://www.khdatabase.com/Daisy_Duck',
|
||||
'https://www.khdatabase.com/Donald_Duck',
|
||||
'https://www.khdatabase.com/Goofy',
|
||||
'https://www.khdatabase.com/Hades',
|
||||
'https://www.khdatabase.com/Hercules',
|
||||
'https://www.khdatabase.com/Ice_Titan',
|
||||
'https://www.khdatabase.com/Jiminy_Cricket',
|
||||
'https://www.khdatabase.com/Kairi',
|
||||
'https://www.khdatabase.com/King_Mickey_Mouse',
|
||||
'https://www.khdatabase.com/Lava_Titan',
|
||||
'https://www.khdatabase.com/Leon',
|
||||
'https://www.khdatabase.com/Maleficent',
|
||||
'https://www.khdatabase.com/Merlin',
|
||||
'https://www.khdatabase.com/Moogle',
|
||||
'https://www.khdatabase.com/Philoctetes',
|
||||
'https://www.khdatabase.com/Queen_Minnie_Mouse',
|
||||
'https://www.khdatabase.com/Riku',
|
||||
'https://www.khdatabase.com/Rock_Titan',
|
||||
'https://www.khdatabase.com/Simba',
|
||||
'https://www.khdatabase.com/Sora',
|
||||
'https://www.khdatabase.com/Tornado_Titan',
|
||||
'https://www.khdatabase.com/Ursula',
|
||||
]
|
||||
|
||||
# Configure the WebDriver
|
||||
driver = configure_driver()
|
||||
|
||||
# Initialize an empty list to store all scraped quotes
|
||||
all_quotes = []
|
||||
try:
|
||||
# Iterate over each URL in the character_urls list
|
||||
for url in character_urls:
|
||||
# Scrape blockquote texts from the current URL
|
||||
quotes = scrape_character_page(driver, url)
|
||||
# Add the scraped quotes to the all_quotes list
|
||||
all_quotes.extend(quotes)
|
||||
finally:
|
||||
# Close the WebDriver once scraping is done or if an error occurs
|
||||
driver.quit()
|
||||
|
||||
# Write the collected quotes to a JSON file
|
||||
with open('quotes.json', 'w') as file:
|
||||
# Convert the list of quotes to JSON format and save i
|
||||
json.dump(all_quotes, file)
|
||||
|
||||
|
||||
# Python's way to check if this script is being run as the main program
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# def configure_driver():
|
||||
# driver = webdriver.Chrome()
|
||||
# return driver
|
||||
|
||||
|
||||
# def get_character_urls(driver, url):
|
||||
# driver.get(url)
|
||||
# character_links = driver.find_elements(By.CSS_SELECTOR, "div.mw-category-group ul li a")
|
||||
# urls = [link.get_attribute('href') for link in character_links]
|
||||
|
||||
# return urls
|
||||
|
||||
# def main():
|
||||
# characters_list_page = "https://www.khdatabase.com/Category:Kingdom_Hearts_characters"
|
||||
|
||||
# driver = configure_driver()
|
||||
|
||||
# try:
|
||||
# character_urls = get_character_urls(driver, characters_list_page)
|
||||
# for url in character_urls:
|
||||
# print(url)
|
||||
# finally:
|
||||
# driver.quit()
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# main()
|
||||
|
||||
# #elements = driver.find_elements(By.TAG_NAME, "blockquote")
|
||||
|
||||
# #for element in elements:
|
||||
# # print(element.text)
|
||||
|
||||
# #driver.quit()
|
Loading…
Reference in a new issue