before changing links

This commit is contained in:
govardhan
2025-06-19 09:01:18 +05:30
commit 6686208bf1
1277 changed files with 29692 additions and 0 deletions

38
clean_md_files.py Normal file
View File

@ -0,0 +1,38 @@
import os
import re
def clean_md_file(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Remove content before first "---" if it exists
if "---" in content:
content = re.sub(r'^.*?---', '', content, flags=re.DOTALL)
# Remove navigation links at the bottom
content = re.sub(r'\n\[Previous\].*$', '', content, flags=re.DOTALL)
# Remove any remaining navigation-related content at the bottom
content = re.sub(r'\n\* \[.*?\].*$', '', content, flags=re.DOTALL)
# Clean up multiple blank lines
content = re.sub(r'\n{3,}', '\n\n', content)
# Trim leading/trailing whitespace while preserving content
content = content.strip()
with open(file_path, 'w', encoding='utf-8') as f:
f.write(content)
def process_directory(dir_path):
for root, dirs, files in os.walk(dir_path):
for file in files:
if file.endswith('.md'):
file_path = os.path.join(root, file)
print(f"Processing: {file_path}")
clean_md_file(file_path)
if __name__ == "__main__":
docs_dir = "/Users/dhanraj/Desktop/kpme_scraper/docs"
process_directory(docs_dir)
print("Completed cleaning markdown files.")