before changing links
This commit is contained in:
76
extract_kpme_hospitals_brave.py
Normal file
76
extract_kpme_hospitals_brave.py
Normal file
@ -0,0 +1,76 @@
|
||||
import time
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.common.keys import Keys
|
||||
from selenium.webdriver.chrome.service import Service
|
||||
from webdriver_manager.chrome import ChromeDriverManager
|
||||
import pandas as pd
|
||||
|
||||
def scrape_hospital_data(url):
|
||||
# Initialize the WebDriver
|
||||
service = Service(ChromeDriverManager().install())
|
||||
driver = webdriver.Chrome(service=service)
|
||||
|
||||
# Open the webpage
|
||||
driver.get(url)
|
||||
|
||||
# List to store all hospital data
|
||||
all_data = []
|
||||
|
||||
while True:
|
||||
# Wait for the table to load
|
||||
time.sleep(2)
|
||||
|
||||
# Find the table by ID
|
||||
table = driver.find_element(By.ID, 'ContentPlaceHolder1_gvw_list')
|
||||
rows = table.find_elements(By.TAG_NAME, 'tr')[1:] # Skip the header row
|
||||
|
||||
for row in rows:
|
||||
cols = row.find_elements(By.TAG_NAME, 'td')
|
||||
if len(cols) > 5: # Ensure the row has enough columns
|
||||
system_of_medicine = cols[0].text
|
||||
category = cols[1].text
|
||||
establishment_name = cols[2].text
|
||||
address = cols[3].text
|
||||
certificate_validity = cols[4].text
|
||||
certificate_number = cols[5].find_element(By.TAG_NAME, 'a').text
|
||||
|
||||
# Append the data to the list
|
||||
all_data.append([
|
||||
system_of_medicine,
|
||||
category,
|
||||
establishment_name,
|
||||
address,
|
||||
certificate_validity,
|
||||
certificate_number
|
||||
])
|
||||
|
||||
# Check for the next page link
|
||||
pagination = driver.find_elements(By.CLASS_NAME, 'pagination')
|
||||
if pagination:
|
||||
next_page_link = pagination[0].find_elements(By.TAG_NAME, 'a')[-1] # Assume the last link is 'Next'
|
||||
if 'Next' in next_page_link.text:
|
||||
next_page_link.click()
|
||||
else:
|
||||
break
|
||||
else:
|
||||
break
|
||||
|
||||
# Close the WebDriver
|
||||
driver.quit()
|
||||
|
||||
# Convert the data to a DataFrame and save to CSV
|
||||
df = pd.DataFrame(all_data, columns=[
|
||||
'System of Medicine',
|
||||
'Category',
|
||||
'Establishment Name',
|
||||
'Address',
|
||||
'Certificate Validity',
|
||||
'Certificate Number'
|
||||
])
|
||||
df.to_csv('kpme_hospitals.csv', index=False)
|
||||
print("Data saved to kpme_hospitals.csv")
|
||||
|
||||
# URL of the KPME portal
|
||||
url = 'https://kpme.karnataka.gov.in/AllapplicationList.aspx'
|
||||
scrape_hospital_data(url)
|
||||
Reference in New Issue
Block a user