76 lines
2.5 KiB
Python
76 lines
2.5 KiB
Python
import time
|
|
from selenium import webdriver
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.common.keys import Keys
|
|
from selenium.webdriver.chrome.service import Service
|
|
from webdriver_manager.chrome import ChromeDriverManager
|
|
import pandas as pd
|
|
|
|
def scrape_hospital_data(url):
|
|
# Initialize the WebDriver
|
|
service = Service(ChromeDriverManager().install())
|
|
driver = webdriver.Chrome(service=service)
|
|
|
|
# Open the webpage
|
|
driver.get(url)
|
|
|
|
# List to store all hospital data
|
|
all_data = []
|
|
|
|
while True:
|
|
# Wait for the table to load
|
|
time.sleep(2)
|
|
|
|
# Find the table by ID
|
|
table = driver.find_element(By.ID, 'ContentPlaceHolder1_gvw_list')
|
|
rows = table.find_elements(By.TAG_NAME, 'tr')[1:] # Skip the header row
|
|
|
|
for row in rows:
|
|
cols = row.find_elements(By.TAG_NAME, 'td')
|
|
if len(cols) > 5: # Ensure the row has enough columns
|
|
system_of_medicine = cols[0].text
|
|
category = cols[1].text
|
|
establishment_name = cols[2].text
|
|
address = cols[3].text
|
|
certificate_validity = cols[4].text
|
|
certificate_number = cols[5].find_element(By.TAG_NAME, 'a').text
|
|
|
|
# Append the data to the list
|
|
all_data.append([
|
|
system_of_medicine,
|
|
category,
|
|
establishment_name,
|
|
address,
|
|
certificate_validity,
|
|
certificate_number
|
|
])
|
|
|
|
# Check for the next page link
|
|
pagination = driver.find_elements(By.CLASS_NAME, 'pagination')
|
|
if pagination:
|
|
next_page_link = pagination[0].find_elements(By.TAG_NAME, 'a')[-1] # Assume the last link is 'Next'
|
|
if 'Next' in next_page_link.text:
|
|
next_page_link.click()
|
|
else:
|
|
break
|
|
else:
|
|
break
|
|
|
|
# Close the WebDriver
|
|
driver.quit()
|
|
|
|
# Convert the data to a DataFrame and save to CSV
|
|
df = pd.DataFrame(all_data, columns=[
|
|
'System of Medicine',
|
|
'Category',
|
|
'Establishment Name',
|
|
'Address',
|
|
'Certificate Validity',
|
|
'Certificate Number'
|
|
])
|
|
df.to_csv('kpme_hospitals.csv', index=False)
|
|
print("Data saved to kpme_hospitals.csv")
|
|
|
|
# URL of the KPME portal
|
|
url = 'https://kpme.karnataka.gov.in/AllapplicationList.aspx'
|
|
scrape_hospital_data(url) |