RMO-Front/convert_to_word.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
将Markdown文件转换为Word文档
"""

import re
from docx import Document
from docx.shared import Pt, RGBColor, Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml.ns import qn

def parse_markdown_to_word(md_file, docx_file):
    """将Markdown文件转换为Word文档"""

    # 读取Markdown文件
    with open(md_file, 'r', encoding='utf-8') as f:
        content = f.read()

    # 创建Word文档
    doc = Document()

    # 设置中文字体
    def set_chinese_font(run):
        run.font.name = '宋体'
        run._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')

    # 设置标题字体
    def set_title_font(run, size=16, bold=True):
        run.font.size = Pt(size)
        run.font.bold = bold
        run.font.color.rgb = RGBColor(0, 0, 0)
        set_chinese_font(run)

    # 设置正文字体
    def set_normal_font(run, size=12):
        run.font.size = Pt(size)
        run.font.bold = False
        set_chinese_font(run)

    # 分割内容为行
    lines = content.split('\n')

    i = 0
    while i < len(lines):
        line = lines[i].strip()

        if not line:
            # 空行
            doc.add_paragraph()
            i += 1
            continue

        # 一级标题
        if line.startswith('# '):
            p = doc.add_heading(line[2:], level=1)
            for run in p.runs:
                set_title_font(run, 20)
            i += 1
            continue

        # 二级标题
        if line.startswith('## '):
            p = doc.add_heading(line[3:], level=2)
            for run in p.runs:
                set_title_font(run, 18)
            i += 1
            continue

        # 三级标题
        if line.startswith('### '):
            p = doc.add_heading(line[4:], level=3)
            for run in p.runs:
                set_title_font(run, 16)
            i += 1
            continue

        # 四级标题
        if line.startswith('#### '):
            p = doc.add_heading(line[5:], level=4)
            for run in p.runs:
                set_title_font(run, 14)
            i += 1
            continue

        # 代码块（跳过，因为Word中代码块处理较复杂）
        if line.startswith('```'):
            i += 1
            while i < len(lines) and not lines[i].strip().startswith('```'):
                i += 1
            i += 1
            continue

        # 列表项
        if line.startswith('- ') or line.startswith('* '):
            text = line[2:].strip()
            # 处理粗体
            text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
            p = doc.add_paragraph(text, style='List Bullet')
            for run in p.runs:
                set_normal_font(run)
            i += 1
            continue

        # 有序列表
        if re.match(r'^\d+\.\s', line):
            text = re.sub(r'^\d+\.\s', '', line)
            # 处理粗体
            text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
            p = doc.add_paragraph(text, style='List Number')
            for run in p.runs:
                set_normal_font(run)
            i += 1
            continue

        # 普通段落
        # 处理粗体、斜体等格式
        text = line
        p = doc.add_paragraph()

        # 分割文本，处理格式标记
        parts = re.split(r'(\*\*.*?\*\*|`.*?`)', text)

        for part in parts:
            if not part:
                continue

            if part.startswith('**') and part.endswith('**'):
                # 粗体
                run = p.add_run(part[2:-2])
                set_normal_font(run)
                run.bold = True
            elif part.startswith('`') and part.endswith('`'):
                # 代码
                run = p.add_run(part[1:-1])
                set_normal_font(run)
                run.font.name = 'Courier New'
            else:
                # 普通文本
                run = p.add_run(part)
                set_normal_font(run)

        i += 1

    # 保存文档
    doc.save(docx_file)
    print(f"成功将 {md_file} 转换为 {docx_file}")

if __name__ == '__main__':
    import sys
    md_file = 'RMO网站需求文档.md'
    docx_file = 'RMO网站需求文档.docx'

    try:
        parse_markdown_to_word(md_file, docx_file)
        print(f"\n转换完成！文件已保存为: {docx_file}")
    except ImportError:
        print("错误: 需要安装 python-docx 库")
        print("请运行: pip install python-docx")
        sys.exit(1)
    except Exception as e:
        print(f"转换过程中出现错误: {e}")
        sys.exit(1)