167 lines
4.6 KiB
Python
167 lines
4.6 KiB
Python
#!/usr/bin/env python
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
将Markdown文件转换为Word文档
|
||
"""
|
||
|
||
import re
|
||
from docx import Document
|
||
from docx.shared import Pt, RGBColor, Inches
|
||
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||
from docx.oxml.ns import qn
|
||
|
||
def parse_markdown_to_word(md_file, docx_file):
|
||
"""将Markdown文件转换为Word文档"""
|
||
|
||
# 读取Markdown文件
|
||
with open(md_file, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
|
||
# 创建Word文档
|
||
doc = Document()
|
||
|
||
# 设置中文字体
|
||
def set_chinese_font(run):
|
||
run.font.name = '宋体'
|
||
run._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')
|
||
|
||
# 设置标题字体
|
||
def set_title_font(run, size=16, bold=True):
|
||
run.font.size = Pt(size)
|
||
run.font.bold = bold
|
||
run.font.color.rgb = RGBColor(0, 0, 0)
|
||
set_chinese_font(run)
|
||
|
||
# 设置正文字体
|
||
def set_normal_font(run, size=12):
|
||
run.font.size = Pt(size)
|
||
run.font.bold = False
|
||
set_chinese_font(run)
|
||
|
||
# 分割内容为行
|
||
lines = content.split('\n')
|
||
|
||
i = 0
|
||
while i < len(lines):
|
||
line = lines[i].strip()
|
||
|
||
if not line:
|
||
# 空行
|
||
doc.add_paragraph()
|
||
i += 1
|
||
continue
|
||
|
||
# 一级标题
|
||
if line.startswith('# '):
|
||
p = doc.add_heading(line[2:], level=1)
|
||
for run in p.runs:
|
||
set_title_font(run, 20)
|
||
i += 1
|
||
continue
|
||
|
||
# 二级标题
|
||
if line.startswith('## '):
|
||
p = doc.add_heading(line[3:], level=2)
|
||
for run in p.runs:
|
||
set_title_font(run, 18)
|
||
i += 1
|
||
continue
|
||
|
||
# 三级标题
|
||
if line.startswith('### '):
|
||
p = doc.add_heading(line[4:], level=3)
|
||
for run in p.runs:
|
||
set_title_font(run, 16)
|
||
i += 1
|
||
continue
|
||
|
||
# 四级标题
|
||
if line.startswith('#### '):
|
||
p = doc.add_heading(line[5:], level=4)
|
||
for run in p.runs:
|
||
set_title_font(run, 14)
|
||
i += 1
|
||
continue
|
||
|
||
# 代码块(跳过,因为Word中代码块处理较复杂)
|
||
if line.startswith('```'):
|
||
i += 1
|
||
while i < len(lines) and not lines[i].strip().startswith('```'):
|
||
i += 1
|
||
i += 1
|
||
continue
|
||
|
||
# 列表项
|
||
if line.startswith('- ') or line.startswith('* '):
|
||
text = line[2:].strip()
|
||
# 处理粗体
|
||
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
|
||
p = doc.add_paragraph(text, style='List Bullet')
|
||
for run in p.runs:
|
||
set_normal_font(run)
|
||
i += 1
|
||
continue
|
||
|
||
# 有序列表
|
||
if re.match(r'^\d+\.\s', line):
|
||
text = re.sub(r'^\d+\.\s', '', line)
|
||
# 处理粗体
|
||
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
|
||
p = doc.add_paragraph(text, style='List Number')
|
||
for run in p.runs:
|
||
set_normal_font(run)
|
||
i += 1
|
||
continue
|
||
|
||
# 普通段落
|
||
# 处理粗体、斜体等格式
|
||
text = line
|
||
p = doc.add_paragraph()
|
||
|
||
# 分割文本,处理格式标记
|
||
parts = re.split(r'(\*\*.*?\*\*|`.*?`)', text)
|
||
|
||
for part in parts:
|
||
if not part:
|
||
continue
|
||
|
||
if part.startswith('**') and part.endswith('**'):
|
||
# 粗体
|
||
run = p.add_run(part[2:-2])
|
||
set_normal_font(run)
|
||
run.bold = True
|
||
elif part.startswith('`') and part.endswith('`'):
|
||
# 代码
|
||
run = p.add_run(part[1:-1])
|
||
set_normal_font(run)
|
||
run.font.name = 'Courier New'
|
||
else:
|
||
# 普通文本
|
||
run = p.add_run(part)
|
||
set_normal_font(run)
|
||
|
||
i += 1
|
||
|
||
# 保存文档
|
||
doc.save(docx_file)
|
||
print(f"成功将 {md_file} 转换为 {docx_file}")
|
||
|
||
if __name__ == '__main__':
|
||
import sys
|
||
md_file = 'RMO网站需求文档.md'
|
||
docx_file = 'RMO网站需求文档.docx'
|
||
|
||
try:
|
||
parse_markdown_to_word(md_file, docx_file)
|
||
print(f"\n转换完成!文件已保存为: {docx_file}")
|
||
except ImportError:
|
||
print("错误: 需要安装 python-docx 库")
|
||
print("请运行: pip install python-docx")
|
||
sys.exit(1)
|
||
except Exception as e:
|
||
print(f"转换过程中出现错误: {e}")
|
||
sys.exit(1)
|
||
|
||
|
||
|