Data_Analysis/分析策略/md_to_docx_convert.py

58 lines
1.6 KiB
Python

from pathlib import Path
from docx import Document
def convert_md_to_docx(md_path: Path, docx_path: Path) -> None:
doc = Document()
lines = md_path.read_text(encoding="utf-8").splitlines()
in_code = False
for raw in lines:
line = raw.rstrip("\n")
if line.strip().startswith("```"):
in_code = not in_code
continue
if in_code:
p = doc.add_paragraph()
p.add_run(line)
continue
if not line.strip():
doc.add_paragraph("")
continue
if line.startswith("# "):
doc.add_heading(line[2:].strip(), level=1)
continue
if line.startswith("## "):
doc.add_heading(line[3:].strip(), level=2)
continue
if line.startswith("### "):
doc.add_heading(line[4:].strip(), level=3)
continue
if line.startswith("- "):
doc.add_paragraph(line[2:].strip(), style="List Bullet")
continue
s = line.lstrip()
if len(s) > 2 and s[0].isdigit() and s[1] == "." and s[2] == " ":
doc.add_paragraph(s[3:].strip(), style="List Number")
continue
# Markdown table rows are preserved as plain text lines for compatibility.
doc.add_paragraph(line)
doc.save(str(docx_path))
if __name__ == "__main__":
src = Path(r"d:\数据分析业务\ixa\药物警戒价值分析策略-正文.md")
dst = Path(r"d:\数据分析业务\ixa\药物警戒价值分析策略-正文.docx")
convert_md_to_docx(src, dst)
print(f"Written: {dst}")