344 lines
12 KiB
Python
344 lines
12 KiB
Python
"""
|
||
Convert the three MCP documentation Markdown files to Word (.docx) format.
|
||
|
||
Handles:
|
||
- Heading levels 1–4
|
||
- Bold (**text**) and inline code (`text`)
|
||
- Fenced code blocks (``` ... ```)
|
||
- Tables (| col | col |)
|
||
- Unordered lists (- item, * item)
|
||
- Ordered lists (1. item)
|
||
- Horizontal rules (---)
|
||
- Blank lines → paragraph spacing
|
||
|
||
Run:
|
||
python docs/md_to_docx.py
|
||
Produces:
|
||
docs/HLD.docx
|
||
docs/LLD.docx
|
||
docs/MANUAL.docx
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import re
|
||
from pathlib import Path
|
||
|
||
from docx import Document
|
||
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||
from docx.oxml.ns import qn
|
||
from docx.oxml import OxmlElement
|
||
from docx.shared import Inches, Pt, RGBColor
|
||
|
||
|
||
# ── Colour palette ────────────────────────────────────────────────────────────
|
||
DARK_BLUE = RGBColor(0x1F, 0x49, 0x7D) # heading 1
|
||
MID_BLUE = RGBColor(0x2E, 0x74, 0xB5) # heading 2
|
||
STEEL_BLUE = RGBColor(0x1F, 0x78, 0xB4) # heading 3
|
||
DARK_GREY = RGBColor(0x40, 0x40, 0x40) # body text
|
||
CODE_BG = RGBColor(0xF2, 0xF2, 0xF2) # code block shading
|
||
TABLE_HEAD = RGBColor(0x1F, 0x49, 0x7D) # table header background
|
||
TABLE_EVEN = RGBColor(0xEA, 0xF2, 0xFF) # alternating row colour
|
||
|
||
|
||
# ── Helpers ───────────────────────────────────────────────────────────────────
|
||
|
||
def _shade_cell(cell, colour: RGBColor) -> None:
|
||
"""Apply a solid background fill to a table cell."""
|
||
tc = cell._tc
|
||
tcPr = tc.get_or_add_tcPr()
|
||
shd = OxmlElement("w:shd")
|
||
shd.set(qn("w:val"), "clear")
|
||
shd.set(qn("w:color"), "auto")
|
||
shd.set(qn("w:fill"), f"{colour[0]:02X}{colour[1]:02X}{colour[2]:02X}")
|
||
tcPr.append(shd)
|
||
|
||
|
||
def _set_cell_border(cell, **kwargs) -> None:
|
||
"""Set borders on a table cell."""
|
||
tc = cell._tc
|
||
tcPr = tc.get_or_add_tcPr()
|
||
tcBorders = OxmlElement("w:tcBorders")
|
||
for side in ("top", "left", "bottom", "right", "insideH", "insideV"):
|
||
if side in kwargs:
|
||
border = OxmlElement(f"w:{side}")
|
||
for attr, val in kwargs[side].items():
|
||
border.set(qn(f"w:{attr}"), val)
|
||
tcBorders.append(border)
|
||
tcPr.append(tcBorders)
|
||
|
||
|
||
def _apply_inline(run, text: str) -> None:
|
||
"""Set run text, detecting and stripping bold/inline-code markers."""
|
||
run.text = text
|
||
|
||
|
||
def _parse_inline(para, text: str) -> None:
|
||
"""
|
||
Parse a line of text for inline Markdown:
|
||
**bold** → bold run
|
||
`code` → monospace run
|
||
plain → normal run
|
||
Adds runs to the given paragraph.
|
||
"""
|
||
pattern = re.compile(r'(\*\*[^*]+\*\*|`[^`]+`)')
|
||
parts = pattern.split(text)
|
||
for part in parts:
|
||
if not part:
|
||
continue
|
||
if part.startswith("**") and part.endswith("**"):
|
||
run = para.add_run(part[2:-2])
|
||
run.bold = True
|
||
elif part.startswith("`") and part.endswith("`"):
|
||
run = para.add_run(part[1:-1])
|
||
run.font.name = "Courier New"
|
||
run.font.size = Pt(9)
|
||
run.font.color.rgb = RGBColor(0xC0, 0x39, 0x2B)
|
||
else:
|
||
run = para.add_run(part)
|
||
|
||
|
||
def _add_heading(doc: Document, text: str, level: int) -> None:
|
||
"""Add a styled heading, stripping any leading '#' symbols."""
|
||
clean = re.sub(r"^#+\s*", "", text).strip()
|
||
# Remove anchor links like {#section-name}
|
||
clean = re.sub(r"\s*\{#[^}]+\}", "", clean)
|
||
para = doc.add_heading(clean, level=level)
|
||
run = para.runs[0] if para.runs else para.add_run(clean)
|
||
if level == 1:
|
||
run.font.color.rgb = DARK_BLUE
|
||
run.font.size = Pt(20)
|
||
elif level == 2:
|
||
run.font.color.rgb = MID_BLUE
|
||
run.font.size = Pt(15)
|
||
elif level == 3:
|
||
run.font.color.rgb = STEEL_BLUE
|
||
run.font.size = Pt(12)
|
||
else:
|
||
run.font.color.rgb = DARK_GREY
|
||
run.font.size = Pt(11)
|
||
run.bold = True
|
||
|
||
|
||
def _add_code_block(doc: Document, lines: list[str]) -> None:
|
||
"""Add a shaded monospace code block."""
|
||
para = doc.add_paragraph()
|
||
para.paragraph_format.left_indent = Inches(0.3)
|
||
para.paragraph_format.space_before = Pt(4)
|
||
para.paragraph_format.space_after = Pt(4)
|
||
# Add shading via XML
|
||
pPr = para._p.get_or_add_pPr()
|
||
shd = OxmlElement("w:shd")
|
||
shd.set(qn("w:val"), "clear")
|
||
shd.set(qn("w:color"), "auto")
|
||
shd.set(qn("w:fill"), "F2F2F2")
|
||
pPr.append(shd)
|
||
|
||
text = "\n".join(lines)
|
||
run = para.add_run(text)
|
||
run.font.name = "Courier New"
|
||
run.font.size = Pt(8.5)
|
||
run.font.color.rgb = RGBColor(0x1A, 0x1A, 0x1A)
|
||
|
||
|
||
def _add_table(doc: Document, rows: list[list[str]]) -> None:
|
||
"""Add a formatted table. First row is treated as the header."""
|
||
if not rows:
|
||
return
|
||
col_count = max(len(r) for r in rows)
|
||
# Normalise row lengths
|
||
rows = [r + [""] * (col_count - len(r)) for r in rows]
|
||
|
||
table = doc.add_table(rows=len(rows), cols=col_count)
|
||
table.style = "Table Grid"
|
||
|
||
for row_idx, row_data in enumerate(rows):
|
||
row = table.rows[row_idx]
|
||
for col_idx, cell_text in enumerate(row_data):
|
||
cell = row.cells[col_idx]
|
||
clean = cell_text.strip().strip("`")
|
||
para = cell.paragraphs[0]
|
||
para.paragraph_format.space_before = Pt(2)
|
||
para.paragraph_format.space_after = Pt(2)
|
||
|
||
if row_idx == 0:
|
||
# Header row
|
||
_shade_cell(cell, TABLE_HEAD)
|
||
run = para.add_run(clean)
|
||
run.bold = True
|
||
run.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF)
|
||
run.font.size = Pt(9)
|
||
else:
|
||
if row_idx % 2 == 0:
|
||
_shade_cell(cell, TABLE_EVEN)
|
||
_parse_inline(para, clean)
|
||
for run in para.runs:
|
||
run.font.size = Pt(9)
|
||
|
||
doc.add_paragraph() # spacing after table
|
||
|
||
|
||
def _add_list_item(doc: Document, text: str, level: int, ordered: bool,
|
||
counter: int) -> None:
|
||
"""Add a bullet or numbered list item."""
|
||
style = "List Bullet" if not ordered else "List Number"
|
||
para = doc.add_paragraph(style=style)
|
||
if level > 0:
|
||
para.paragraph_format.left_indent = Inches(0.25 * (level + 1))
|
||
_parse_inline(para, text)
|
||
for run in para.runs:
|
||
run.font.size = Pt(10)
|
||
|
||
|
||
def _parse_md_table(raw_rows: list[str]) -> list[list[str]]:
|
||
"""Convert raw Markdown table lines to a list of cell lists."""
|
||
result = []
|
||
for line in raw_rows:
|
||
# Skip separator rows (---|---)
|
||
if re.match(r"^\s*\|?[\s\-:]+\|[\s\-:|]+\s*$", line):
|
||
continue
|
||
cells = [c.strip() for c in line.strip().strip("|").split("|")]
|
||
if cells:
|
||
result.append(cells)
|
||
return result
|
||
|
||
|
||
# ── Main converter ────────────────────────────────────────────────────────────
|
||
|
||
def convert(md_path: Path, docx_path: Path) -> None:
|
||
doc = Document()
|
||
|
||
# Page margins
|
||
for section in doc.sections:
|
||
section.top_margin = Inches(1.0)
|
||
section.bottom_margin = Inches(1.0)
|
||
section.left_margin = Inches(1.2)
|
||
section.right_margin = Inches(1.2)
|
||
|
||
# Default body style
|
||
style = doc.styles["Normal"]
|
||
style.font.name = "Calibri"
|
||
style.font.size = Pt(10.5)
|
||
style.font.color.rgb = DARK_GREY
|
||
|
||
lines = md_path.read_text(encoding="utf-8").splitlines()
|
||
|
||
i = 0
|
||
in_code_block = False
|
||
code_lines: list[str] = []
|
||
table_rows: list[str] = []
|
||
in_table = False
|
||
|
||
while i < len(lines):
|
||
line = lines[i]
|
||
|
||
# ── Fenced code block ──────────────────────────────────────────────
|
||
if line.strip().startswith("```"):
|
||
if not in_code_block:
|
||
in_code_block = True
|
||
code_lines = []
|
||
else:
|
||
in_code_block = False
|
||
_add_code_block(doc, code_lines)
|
||
i += 1
|
||
continue
|
||
|
||
if in_code_block:
|
||
code_lines.append(line)
|
||
i += 1
|
||
continue
|
||
|
||
# ── Table detection ────────────────────────────────────────────────
|
||
is_table_line = "|" in line and line.strip().startswith("|")
|
||
if is_table_line:
|
||
table_rows.append(line)
|
||
i += 1
|
||
continue
|
||
elif table_rows:
|
||
parsed = _parse_md_table(table_rows)
|
||
if parsed:
|
||
_add_table(doc, parsed)
|
||
table_rows = []
|
||
|
||
# ── Headings ────────────────────────────────────────────────────────
|
||
m = re.match(r"^(#{1,4})\s+(.+)$", line)
|
||
if m:
|
||
level = len(m.group(1))
|
||
_add_heading(doc, m.group(2), level)
|
||
i += 1
|
||
continue
|
||
|
||
# ── Horizontal rule ─────────────────────────────────────────────────
|
||
if re.match(r"^[-*_]{3,}\s*$", line.strip()):
|
||
para = doc.add_paragraph()
|
||
pPr = para._p.get_or_add_pPr()
|
||
pBdr = OxmlElement("w:pBdr")
|
||
bottom = OxmlElement("w:bottom")
|
||
bottom.set(qn("w:val"), "single")
|
||
bottom.set(qn("w:sz"), "6")
|
||
bottom.set(qn("w:space"), "1")
|
||
bottom.set(qn("w:color"), "2E74B5")
|
||
pBdr.append(bottom)
|
||
pPr.append(pBdr)
|
||
i += 1
|
||
continue
|
||
|
||
# ── Unordered list ──────────────────────────────────────────────────
|
||
m = re.match(r"^(\s*)[-*]\s+(.+)$", line)
|
||
if m:
|
||
indent = len(m.group(1)) // 2
|
||
_add_list_item(doc, m.group(2), indent, ordered=False, counter=0)
|
||
i += 1
|
||
continue
|
||
|
||
# ── Ordered list ────────────────────────────────────────────────────
|
||
m = re.match(r"^(\s*)\d+\.\s+(.+)$", line)
|
||
if m:
|
||
indent = len(m.group(1)) // 2
|
||
_add_list_item(doc, m.group(2), indent, ordered=True, counter=0)
|
||
i += 1
|
||
continue
|
||
|
||
# ── Blank line ──────────────────────────────────────────────────────
|
||
if not line.strip():
|
||
i += 1
|
||
continue
|
||
|
||
# ── Plain paragraph ─────────────────────────────────────────────────
|
||
para = doc.add_paragraph()
|
||
para.paragraph_format.space_after = Pt(4)
|
||
_parse_inline(para, line)
|
||
for run in para.runs:
|
||
run.font.size = Pt(10.5)
|
||
i += 1
|
||
|
||
# Flush any remaining table
|
||
if table_rows:
|
||
parsed = _parse_md_table(table_rows)
|
||
if parsed:
|
||
_add_table(doc, parsed)
|
||
|
||
doc.save(str(docx_path))
|
||
print(f" Written: {docx_path} ({docx_path.stat().st_size // 1024} KB)")
|
||
|
||
|
||
# ── Entry point ───────────────────────────────────────────────────────────────
|
||
|
||
if __name__ == "__main__":
|
||
docs_dir = Path(__file__).parent
|
||
|
||
files = [
|
||
("HLD.md", "HLD.docx"),
|
||
("LLD.md", "LLD.docx"),
|
||
("MANUAL.md", "MANUAL.docx"),
|
||
]
|
||
|
||
print("Converting Markdown → Word (.docx) ...")
|
||
for md_name, docx_name in files:
|
||
md_path = docs_dir / md_name
|
||
docx_path = docs_dir / docx_name
|
||
print(f" Processing {md_name} ...")
|
||
convert(md_path, docx_path)
|
||
|
||
print("Done.")
|