webscraper/scraper.py

from selenium import webdriver  
from selenium.webdriver.common.by import By  
import json #the first few of lines import different selenium libraries

# Function to configure and return a WebDriver instance
def configure_driver():
    # Configure the driver (e.g., using Chrome)
    driver = webdriver.Chrome()
    return driver

# Function to scrape blockquote texts from a given character page
def scrape_character_page(driver, url):
    # Navigate to the character page
    driver.get(url)

    # Find all blockquote elements and print their text
    blockquotes = driver.find_elements(By.TAG_NAME, "blockquote")
    # Return a list of texts from each blockquote element
    return [blockquote.text for blockquote in blockquotes]

def main():
    # List of character page URLs to be scraped
    character_urls = [
        'https://www.khdatabase.com/Ansem',
        'https://www.khdatabase.com/Ariel',
        'https://www.khdatabase.com/Cloud',
        'https://www.khdatabase.com/Daisy_Duck',
        'https://www.khdatabase.com/Donald_Duck',
        'https://www.khdatabase.com/Goofy',
        'https://www.khdatabase.com/Hades',
        'https://www.khdatabase.com/Hercules',
        'https://www.khdatabase.com/Ice_Titan',
        'https://www.khdatabase.com/Jiminy_Cricket',
        'https://www.khdatabase.com/Kairi',
        'https://www.khdatabase.com/King_Mickey_Mouse',
        'https://www.khdatabase.com/Lava_Titan',
        'https://www.khdatabase.com/Leon',
        'https://www.khdatabase.com/Maleficent',
        'https://www.khdatabase.com/Merlin',
        'https://www.khdatabase.com/Moogle',
        'https://www.khdatabase.com/Philoctetes',
        'https://www.khdatabase.com/Queen_Minnie_Mouse',
        'https://www.khdatabase.com/Riku',
        'https://www.khdatabase.com/Rock_Titan',
        'https://www.khdatabase.com/Simba',
        'https://www.khdatabase.com/Sora',
        'https://www.khdatabase.com/Tornado_Titan',
        'https://www.khdatabase.com/Ursula',
    ]

    # Configure the WebDriver
    driver = configure_driver()
    
    # Initialize an empty list to store all scraped quotes
    all_quotes = [] 
    try:
        # Iterate over each URL in the character_urls list
        for url in character_urls:
            # Scrape blockquote texts from the current URL
            quotes = scrape_character_page(driver, url)
            # Add the scraped quotes to the all_quotes list
            all_quotes.extend(quotes)
    finally:
        # Close the WebDriver once scraping is done or if an error occurs
        driver.quit()

    # Write the collected quotes to a JSON file
    with open('quotes.json', 'w') as file:
        # Convert the list of quotes to JSON format and save i
        json.dump(all_quotes, file)


# Python's way to check if this script is being run as the main program
if __name__ == "__main__":
    main()


# def configure_driver():
#     driver = webdriver.Chrome()
#     return driver


# def get_character_urls(driver, url):
#     driver.get(url)
#     character_links = driver.find_elements(By.CSS_SELECTOR, "div.mw-category-group ul li a")
#     urls = [link.get_attribute('href') for link in character_links]

#     return urls

# def main():
#     characters_list_page = "https://www.khdatabase.com/Category:Kingdom_Hearts_characters"

#     driver = configure_driver()

#     try:
#         character_urls = get_character_urls(driver, characters_list_page)
#         for url in character_urls:
#             print(url)
#     finally:
#         driver.quit()

# if __name__ == "__main__":
#     main()

# #elements = driver.find_elements(By.TAG_NAME, "blockquote")  

# #for element in elements:
# #    print(element.text)

# #driver.quit()
first upload uploaded api, scraper, as well as KH quotes scraped from websites 2023-12-28 01:30:27 +00:00			`from selenium import webdriver`
			`from selenium.webdriver.common.by import By`
			`import json #the first few of lines import different selenium libraries`

			`# Function to configure and return a WebDriver instance`
			`def configure_driver():`
			`# Configure the driver (e.g., using Chrome)`
			`driver = webdriver.Chrome()`
			`return driver`

			`# Function to scrape blockquote texts from a given character page`
			`def scrape_character_page(driver, url):`
			`# Navigate to the character page`
			`driver.get(url)`

			`# Find all blockquote elements and print their text`
			`blockquotes = driver.find_elements(By.TAG_NAME, "blockquote")`
			`# Return a list of texts from each blockquote element`
			`return [blockquote.text for blockquote in blockquotes]`

			`def main():`
			`# List of character page URLs to be scraped`
			`character_urls = [`
			`'https://www.khdatabase.com/Ansem',`
			`'https://www.khdatabase.com/Ariel',`
			`'https://www.khdatabase.com/Cloud',`
			`'https://www.khdatabase.com/Daisy_Duck',`
			`'https://www.khdatabase.com/Donald_Duck',`
			`'https://www.khdatabase.com/Goofy',`
			`'https://www.khdatabase.com/Hades',`
			`'https://www.khdatabase.com/Hercules',`
			`'https://www.khdatabase.com/Ice_Titan',`
			`'https://www.khdatabase.com/Jiminy_Cricket',`
			`'https://www.khdatabase.com/Kairi',`
			`'https://www.khdatabase.com/King_Mickey_Mouse',`
			`'https://www.khdatabase.com/Lava_Titan',`
			`'https://www.khdatabase.com/Leon',`
			`'https://www.khdatabase.com/Maleficent',`
			`'https://www.khdatabase.com/Merlin',`
			`'https://www.khdatabase.com/Moogle',`
			`'https://www.khdatabase.com/Philoctetes',`
			`'https://www.khdatabase.com/Queen_Minnie_Mouse',`
			`'https://www.khdatabase.com/Riku',`
			`'https://www.khdatabase.com/Rock_Titan',`
			`'https://www.khdatabase.com/Simba',`
			`'https://www.khdatabase.com/Sora',`
			`'https://www.khdatabase.com/Tornado_Titan',`
			`'https://www.khdatabase.com/Ursula',`
			`]`

			`# Configure the WebDriver`
			`driver = configure_driver()`

			`# Initialize an empty list to store all scraped quotes`
			`all_quotes = []`
			`try:`
			`# Iterate over each URL in the character_urls list`
			`for url in character_urls:`
			`# Scrape blockquote texts from the current URL`
			`quotes = scrape_character_page(driver, url)`
			`# Add the scraped quotes to the all_quotes list`
			`all_quotes.extend(quotes)`
			`finally:`
			`# Close the WebDriver once scraping is done or if an error occurs`
			`driver.quit()`

			`# Write the collected quotes to a JSON file`
			`with open('quotes.json', 'w') as file:`
			`# Convert the list of quotes to JSON format and save i`
			`json.dump(all_quotes, file)`


			`# Python's way to check if this script is being run as the main program`
			`if __name__ == "__main__":`
			`main()`



























































































			`# def configure_driver():`
			`# driver = webdriver.Chrome()`
			`# return driver`


			`# def get_character_urls(driver, url):`
			`# driver.get(url)`
			`# character_links = driver.find_elements(By.CSS_SELECTOR, "div.mw-category-group ul li a")`
			`# urls = [link.get_attribute('href') for link in character_links]`

			`# return urls`

			`# def main():`
			`# characters_list_page = "https://www.khdatabase.com/Category:Kingdom_Hearts_characters"`

			`# driver = configure_driver()`

			`# try:`
			`# character_urls = get_character_urls(driver, characters_list_page)`
			`# for url in character_urls:`
			`# print(url)`
			`# finally:`
			`# driver.quit()`

			`# if __name__ == "__main__":`
			`# main()`

			`# #elements = driver.find_elements(By.TAG_NAME, "blockquote")`

			`# #for element in elements:`
			`# # print(element.text)`

			`# #driver.quit()`