from selenium import webdriver from selenium.webdriver.common.by import By import json #the first few of lines import different selenium libraries # Function to configure and return a WebDriver instance def configure_driver(): # Configure the driver (e.g., using Chrome) driver = webdriver.Chrome() return driver # Function to scrape blockquote texts from a given character page def scrape_character_page(driver, url): # Navigate to the character page driver.get(url) # Find all blockquote elements and print their text blockquotes = driver.find_elements(By.TAG_NAME, "blockquote") # Return a list of texts from each blockquote element return [blockquote.text for blockquote in blockquotes] def main(): # List of character page URLs to be scraped character_urls = [ 'https://www.khdatabase.com/Ansem', 'https://www.khdatabase.com/Ariel', 'https://www.khdatabase.com/Cloud', 'https://www.khdatabase.com/Daisy_Duck', 'https://www.khdatabase.com/Donald_Duck', 'https://www.khdatabase.com/Goofy', 'https://www.khdatabase.com/Hades', 'https://www.khdatabase.com/Hercules', 'https://www.khdatabase.com/Ice_Titan', 'https://www.khdatabase.com/Jiminy_Cricket', 'https://www.khdatabase.com/Kairi', 'https://www.khdatabase.com/King_Mickey_Mouse', 'https://www.khdatabase.com/Lava_Titan', 'https://www.khdatabase.com/Leon', 'https://www.khdatabase.com/Maleficent', 'https://www.khdatabase.com/Merlin', 'https://www.khdatabase.com/Moogle', 'https://www.khdatabase.com/Philoctetes', 'https://www.khdatabase.com/Queen_Minnie_Mouse', 'https://www.khdatabase.com/Riku', 'https://www.khdatabase.com/Rock_Titan', 'https://www.khdatabase.com/Simba', 'https://www.khdatabase.com/Sora', 'https://www.khdatabase.com/Tornado_Titan', 'https://www.khdatabase.com/Ursula', ] # Configure the WebDriver driver = configure_driver() # Initialize an empty list to store all scraped quotes all_quotes = [] try: # Iterate over each URL in the character_urls list for url in character_urls: # Scrape blockquote texts from the current URL quotes = scrape_character_page(driver, url) # Add the scraped quotes to the all_quotes list all_quotes.extend(quotes) finally: # Close the WebDriver once scraping is done or if an error occurs driver.quit() # Write the collected quotes to a JSON file with open('quotes.json', 'w') as file: # Convert the list of quotes to JSON format and save i json.dump(all_quotes, file) # Python's way to check if this script is being run as the main program if __name__ == "__main__": main() # def configure_driver(): # driver = webdriver.Chrome() # return driver # def get_character_urls(driver, url): # driver.get(url) # character_links = driver.find_elements(By.CSS_SELECTOR, "div.mw-category-group ul li a") # urls = [link.get_attribute('href') for link in character_links] # return urls # def main(): # characters_list_page = "https://www.khdatabase.com/Category:Kingdom_Hearts_characters" # driver = configure_driver() # try: # character_urls = get_character_urls(driver, characters_list_page) # for url in character_urls: # print(url) # finally: # driver.quit() # if __name__ == "__main__": # main() # #elements = driver.find_elements(By.TAG_NAME, "blockquote") # #for element in elements: # # print(element.text) # #driver.quit()