import os import re def clean_md_file(file_path): with open(file_path, 'r', encoding='utf-8') as f: content = f.read() # Remove content before first "---" if it exists if "---" in content: content = re.sub(r'^.*?---', '', content, flags=re.DOTALL) # Remove navigation links at the bottom content = re.sub(r'\n\[Previous\].*$', '', content, flags=re.DOTALL) # Remove any remaining navigation-related content at the bottom content = re.sub(r'\n\* \[.*?\].*$', '', content, flags=re.DOTALL) # Clean up multiple blank lines content = re.sub(r'\n{3,}', '\n\n', content) # Trim leading/trailing whitespace while preserving content content = content.strip() with open(file_path, 'w', encoding='utf-8') as f: f.write(content) def process_directory(dir_path): for root, dirs, files in os.walk(dir_path): for file in files: if file.endswith('.md'): file_path = os.path.join(root, file) print(f"Processing: {file_path}") clean_md_file(file_path) if __name__ == "__main__": docs_dir = "/Users/dhanraj/Desktop/kpme_scraper/docs" process_directory(docs_dir) print("Completed cleaning markdown files.")