RMO-Front/convert_to_word.py

167 lines
4.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
将Markdown文件转换为Word文档
"""
import re
from docx import Document
from docx.shared import Pt, RGBColor, Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml.ns import qn
def parse_markdown_to_word(md_file, docx_file):
"""将Markdown文件转换为Word文档"""
# 读取Markdown文件
with open(md_file, 'r', encoding='utf-8') as f:
content = f.read()
# 创建Word文档
doc = Document()
# 设置中文字体
def set_chinese_font(run):
run.font.name = '宋体'
run._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')
# 设置标题字体
def set_title_font(run, size=16, bold=True):
run.font.size = Pt(size)
run.font.bold = bold
run.font.color.rgb = RGBColor(0, 0, 0)
set_chinese_font(run)
# 设置正文字体
def set_normal_font(run, size=12):
run.font.size = Pt(size)
run.font.bold = False
set_chinese_font(run)
# 分割内容为行
lines = content.split('\n')
i = 0
while i < len(lines):
line = lines[i].strip()
if not line:
# 空行
doc.add_paragraph()
i += 1
continue
# 一级标题
if line.startswith('# '):
p = doc.add_heading(line[2:], level=1)
for run in p.runs:
set_title_font(run, 20)
i += 1
continue
# 二级标题
if line.startswith('## '):
p = doc.add_heading(line[3:], level=2)
for run in p.runs:
set_title_font(run, 18)
i += 1
continue
# 三级标题
if line.startswith('### '):
p = doc.add_heading(line[4:], level=3)
for run in p.runs:
set_title_font(run, 16)
i += 1
continue
# 四级标题
if line.startswith('#### '):
p = doc.add_heading(line[5:], level=4)
for run in p.runs:
set_title_font(run, 14)
i += 1
continue
# 代码块跳过因为Word中代码块处理较复杂
if line.startswith('```'):
i += 1
while i < len(lines) and not lines[i].strip().startswith('```'):
i += 1
i += 1
continue
# 列表项
if line.startswith('- ') or line.startswith('* '):
text = line[2:].strip()
# 处理粗体
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
p = doc.add_paragraph(text, style='List Bullet')
for run in p.runs:
set_normal_font(run)
i += 1
continue
# 有序列表
if re.match(r'^\d+\.\s', line):
text = re.sub(r'^\d+\.\s', '', line)
# 处理粗体
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
p = doc.add_paragraph(text, style='List Number')
for run in p.runs:
set_normal_font(run)
i += 1
continue
# 普通段落
# 处理粗体、斜体等格式
text = line
p = doc.add_paragraph()
# 分割文本,处理格式标记
parts = re.split(r'(\*\*.*?\*\*|`.*?`)', text)
for part in parts:
if not part:
continue
if part.startswith('**') and part.endswith('**'):
# 粗体
run = p.add_run(part[2:-2])
set_normal_font(run)
run.bold = True
elif part.startswith('`') and part.endswith('`'):
# 代码
run = p.add_run(part[1:-1])
set_normal_font(run)
run.font.name = 'Courier New'
else:
# 普通文本
run = p.add_run(part)
set_normal_font(run)
i += 1
# 保存文档
doc.save(docx_file)
print(f"成功将 {md_file} 转换为 {docx_file}")
if __name__ == '__main__':
import sys
md_file = 'RMO网站需求文档.md'
docx_file = 'RMO网站需求文档.docx'
try:
parse_markdown_to_word(md_file, docx_file)
print(f"\n转换完成!文件已保存为: {docx_file}")
except ImportError:
print("错误: 需要安装 python-docx 库")
print("请运行: pip install python-docx")
sys.exit(1)
except Exception as e:
print(f"转换过程中出现错误: {e}")
sys.exit(1)