before changing links

This commit is contained in:
govardhan
2025-06-19 09:01:18 +05:30
commit 6686208bf1
1277 changed files with 29692 additions and 0 deletions

View File

@ -0,0 +1,76 @@
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
def scrape_hospital_data(url):
# Initialize the WebDriver
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)
# Open the webpage
driver.get(url)
# List to store all hospital data
all_data = []
while True:
# Wait for the table to load
time.sleep(2)
# Find the table by ID
table = driver.find_element(By.ID, 'ContentPlaceHolder1_gvw_list')
rows = table.find_elements(By.TAG_NAME, 'tr')[1:] # Skip the header row
for row in rows:
cols = row.find_elements(By.TAG_NAME, 'td')
if len(cols) > 5: # Ensure the row has enough columns
system_of_medicine = cols[0].text
category = cols[1].text
establishment_name = cols[2].text
address = cols[3].text
certificate_validity = cols[4].text
certificate_number = cols[5].find_element(By.TAG_NAME, 'a').text
# Append the data to the list
all_data.append([
system_of_medicine,
category,
establishment_name,
address,
certificate_validity,
certificate_number
])
# Check for the next page link
pagination = driver.find_elements(By.CLASS_NAME, 'pagination')
if pagination:
next_page_link = pagination[0].find_elements(By.TAG_NAME, 'a')[-1] # Assume the last link is 'Next'
if 'Next' in next_page_link.text:
next_page_link.click()
else:
break
else:
break
# Close the WebDriver
driver.quit()
# Convert the data to a DataFrame and save to CSV
df = pd.DataFrame(all_data, columns=[
'System of Medicine',
'Category',
'Establishment Name',
'Address',
'Certificate Validity',
'Certificate Number'
])
df.to_csv('kpme_hospitals.csv', index=False)
print("Data saved to kpme_hospitals.csv")
# URL of the KPME portal
url = 'https://kpme.karnataka.gov.in/AllapplicationList.aspx'
scrape_hospital_data(url)