import time from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.chrome.service import Service from webdriver_manager.chrome import ChromeDriverManager import pandas as pd def scrape_hospital_data(url): # Initialize the WebDriver service = Service(ChromeDriverManager().install()) driver = webdriver.Chrome(service=service) # Open the webpage driver.get(url) # List to store all hospital data all_data = [] while True: # Wait for the table to load time.sleep(2) # Find the table by ID table = driver.find_element(By.ID, 'ContentPlaceHolder1_gvw_list') rows = table.find_elements(By.TAG_NAME, 'tr')[1:] # Skip the header row for row in rows: cols = row.find_elements(By.TAG_NAME, 'td') if len(cols) > 5: # Ensure the row has enough columns system_of_medicine = cols[0].text category = cols[1].text establishment_name = cols[2].text address = cols[3].text certificate_validity = cols[4].text certificate_number = cols[5].find_element(By.TAG_NAME, 'a').text # Append the data to the list all_data.append([ system_of_medicine, category, establishment_name, address, certificate_validity, certificate_number ]) # Check for the next page link pagination = driver.find_elements(By.CLASS_NAME, 'pagination') if pagination: next_page_link = pagination[0].find_elements(By.TAG_NAME, 'a')[-1] # Assume the last link is 'Next' if 'Next' in next_page_link.text: next_page_link.click() else: break else: break # Close the WebDriver driver.quit() # Convert the data to a DataFrame and save to CSV df = pd.DataFrame(all_data, columns=[ 'System of Medicine', 'Category', 'Establishment Name', 'Address', 'Certificate Validity', 'Certificate Number' ]) df.to_csv('kpme_hospitals.csv', index=False) print("Data saved to kpme_hospitals.csv") # URL of the KPME portal url = 'https://kpme.karnataka.gov.in/AllapplicationList.aspx' scrape_hospital_data(url)