before changing links
This commit is contained in:
38
clean_md_files.py
Normal file
38
clean_md_files.py
Normal file
@ -0,0 +1,38 @@
|
||||
import os
|
||||
import re
|
||||
|
||||
def clean_md_file(file_path):
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# Remove content before first "---" if it exists
|
||||
if "---" in content:
|
||||
content = re.sub(r'^.*?---', '', content, flags=re.DOTALL)
|
||||
|
||||
# Remove navigation links at the bottom
|
||||
content = re.sub(r'\n\[Previous\].*$', '', content, flags=re.DOTALL)
|
||||
|
||||
# Remove any remaining navigation-related content at the bottom
|
||||
content = re.sub(r'\n\* \[.*?\].*$', '', content, flags=re.DOTALL)
|
||||
|
||||
# Clean up multiple blank lines
|
||||
content = re.sub(r'\n{3,}', '\n\n', content)
|
||||
|
||||
# Trim leading/trailing whitespace while preserving content
|
||||
content = content.strip()
|
||||
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
|
||||
def process_directory(dir_path):
|
||||
for root, dirs, files in os.walk(dir_path):
|
||||
for file in files:
|
||||
if file.endswith('.md'):
|
||||
file_path = os.path.join(root, file)
|
||||
print(f"Processing: {file_path}")
|
||||
clean_md_file(file_path)
|
||||
|
||||
if __name__ == "__main__":
|
||||
docs_dir = "/Users/dhanraj/Desktop/kpme_scraper/docs"
|
||||
process_directory(docs_dir)
|
||||
print("Completed cleaning markdown files.")
|
||||
Reference in New Issue
Block a user