import base64 import subprocess import yaml import re import os # Load config.yaml for document properties def load_config(config_path): with open(config_path, 'r', encoding='utf-8') as f: return yaml.safe_load(f) if __name__ == "__main__": config = load_config('/app/data/config.yaml') # Step 1: Extract ```diagram blocks (base64-encoded SVG) and save as .svg files input_md = "/app/data/article.md" diagrams_dir = "/app/data/diagrams" os.makedirs(diagrams_dir, exist_ok=True) with open(input_md, 'r', encoding='utf-8') as f: md_content = f.read() # Replace diagram blocks with image references def replace_diagram_block(match): idx = replace_diagram_block.counter svg_content = base64.b64decode(match.group(1).strip()) svg_path = os.path.join(diagrams_dir, f"diagram_{idx}.svg") with open(svg_path, 'wb') as sf: sf.write(svg_content) print(f"Extracted SVG diagram to {svg_path}") replace_diagram_block.counter += 1 # Return markdown image reference return f'![Diagram {idx}](/app/data/diagrams/diagram_{idx}.svg)' replace_diagram_block.counter = 1 new_md_content = re.sub(r'```diagram(.*?)```', replace_diagram_block, md_content, flags=re.DOTALL) # Save the modified markdown with image references temp_md = "/app/data/article_with_svgs.md" with open(temp_md, 'w', encoding='utf-8') as f: f.write(new_md_content) # Step 2: Convert Markdown to ODT using Pandoc output_odt = "/app/data/output.odt" subprocess.run([ "pandoc", temp_md, "-o", output_odt ], check=True) # Step 2: Convert Markdown to ODT using Pandoc output_odt = "/app/data/output.odt" subprocess.run([ "pandoc", input_md, "-o", output_odt ], check=True) print(f"Converted {input_md} to {output_odt}")