# 安装LibreOffice
# Ubuntu/Debian
sudo apt-get install libreoffice
# macOS
brew install libreoffice
# Windows: 从官网下载安装
import subprocess
import os
import tempfile
from pathlib import Path
def convert_odt_to_pdf_libreoffice(input_path, output_path=None):
"""
使用LibreOffice将ODT转换为PDF
参数:
input_path: 输入ODT文件路径
output_path: 输出PDF文件路径(可选,默认与输入文件同目录)
"""
# 检查输入文件是否存在
if not os.path.exists(input_path):
raise FileNotFoundError(f"输入文件不存在: {input_path}")
# 设置输出路径
if output_path is None:
output_path = Path(input_path).with_suffix('.pdf')
# 确保输出目录存在
os.makedirs(os.path.dirname(output_path), exist_ok=True)
# LibreOffice转换命令
cmd = [
'libreoffice',
'--headless',
'--convert-to', 'pdf',
'--outdir', os.path.dirname(output_path),
input_path
]
try:
# 执行转换
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
print(f"转换成功: {input_path} -> {output_path}")
return output_path
except subprocess.CalledProcessError as e:
print(f"转换失败: {e.stderr}")
return None
# 使用示例
if __name__ == "__main__":
# 转换单个文件
convert_odt_to_pdf_libreoffice("document.odt", "output.pdf")
# 批量转换
import glob
odt_files = glob.glob("*.odt")
for odt_file in odt_files:
convert_odt_to_pdf_libreoffice(odt_file)
pip install odf2pdf
from odf2pdf import convert_odt_to_pdf
import os
def convert_with_odf2pdf(input_path, output_path=None):
"""
使用odf2pdf库转换
"""
if output_path is None:
output_path = os.path.splitext(input_path)[0] + '.pdf'
try:
convert_odt_to_pdf(input_path, output_path)
print(f"转换完成: {output_path}")
return output_path
except Exception as e:
print(f"转换失败: {e}")
return None
# 使用示例
convert_with_odf2pdf("document.odt")
pip install py3o.template
from py3o.template import Template
import os
def convert_with_py3o(template_path, data_dict, output_pdf_path):
"""
使用模板和数据生成PDF
"""
# 创建模板实例
template = Template(template_path, output_pdf_path)
# 填充数据
template.render(data_dict)
print(f"PDF生成完成: {output_pdf_path}")
return output_pdf_path
# 使用示例
data = {
'name': '张三',
'date': '2024-01-15',
'items': ['项目1', '项目2', '项目3']
}
convert_with_py3o("template.odt", data, "output.pdf")
pip install odfpy reportlab
from odf.opendocument import load
from odf.text import P
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer
def convert_odt_to_pdf_custom(input_path, output_path):
"""
自定义ODT到PDF转换(处理基本文本)
"""
# 加载ODT文档
doc = load(input_path)
# 创建PDF文档
pdf = SimpleDocTemplate(
output_path,
pagesize=letter,
rightMargin=72,
leftMargin=72,
topMargin=72,
bottomMargin=72
)
# 获取样式
styles = getSampleStyleSheet()
# 提取文本内容
story = []
for elem in doc.getElementsByType(P):
text = ""
for node in elem.childNodes:
if hasattr(node, 'data'):
text += node.data
if text.strip():
# 创建段落并添加到故事
p = Paragraph(text, styles['Normal'])
story.append(p)
story.append(Spacer(1, 12))
# 构建PDF
pdf.build(story)
print(f"转换完成: {output_path}")
return output_path
# 使用示例
convert_odt_to_pdf_custom("document.odt", "output.pdf")
import os
import subprocess
from pathlib import Path
from abc import ABC, abstractmethod
class ODTtoPDFConverter(ABC):
"""ODT转PDF转换器基类"""
@abstractmethod
def convert(self, input_path, output_path=None):
pass
def validate_input(self, input_path):
"""验证输入文件"""
if not os.path.exists(input_path):
raise FileNotFoundError(f"文件不存在: {input_path}")
if not input_path.lower().endswith('.odt'):
raise ValueError("文件必须是ODT格式")
return True
class LibreOfficeConverter(ODTtoPDFConverter):
"""使用LibreOffice转换"""
def __init__(self, libreoffice_path=None):
self.libreoffice_path = libreoffice_path or 'libreoffice'
def convert(self, input_path, output_path=None):
try:
self.validate_input(input_path)
# 设置输出路径
if output_path is None:
output_path = Path(input_path).with_suffix('.pdf')
# 确保输出目录存在
os.makedirs(os.path.dirname(output_path), exist_ok=True)
# 构建命令
cmd = [
self.libreoffice_path,
'--headless',
'--convert-to', 'pdf:writer_pdf_Export',
'--outdir', os.path.dirname(output_path),
input_path
]
# 执行转换
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=60
)
if result.returncode == 0:
print(f"✅ 转换成功: {output_path}")
return str(output_path)
else:
print(f"❌ 转换失败: {result.stderr}")
return None
except Exception as e:
print(f"❌ 转换出错: {e}")
return None
def batch_convert(self, input_dir, output_dir=None, pattern="*.odt"):
"""批量转换"""
import glob
if output_dir is None:
output_dir = input_dir
os.makedirs(output_dir, exist_ok=True)
odt_files = glob.glob(os.path.join(input_dir, pattern))
results = []
for odt_file in odt_files:
output_file = os.path.join(
output_dir,
Path(odt_file).stem + '.pdf'
)
result = self.convert(odt_file, output_file)
results.append((odt_file, result))
return results
# 使用示例
if __name__ == "__main__":
# 创建转换器实例
converter = LibreOfficeConverter()
# 转换单个文件
converter.convert("example.odt")
# 批量转换
# converter.batch_convert("./documents", "./pdfs")
# 使用自定义LibreOffice路径(Windows示例)
# win_converter = LibreOfficeConverter(r"C:\Program Files\LibreOffice\program\soffice.exe")
from PyPDF2 import PdfReader, PdfWriter
from reportlab.pdfgen import canvas
import io
def add_watermark_to_pdf(input_pdf, output_pdf, watermark_text="机密"):
"""添加水印到PDF"""
# 读取原始PDF
reader = PdfReader(input_pdf)
writer = PdfWriter()
# 创建水印
packet = io.BytesIO()
can = canvas.Canvas(packet, pagesize=letter)
# 设置水印样式
can.setFont("Helvetica", 60)
can.setFillAlpha(0.3) # 透明度
can.rotate(45)
# 在页面中心添加水印文本
can.drawString(200, 100, watermark_text)
can.save()
# 移动到数据开始处
packet.seek(0)
watermark = PdfReader(packet)
# 为每一页添加水印
for page_num in range(len(reader.pages)):
page = reader.pages[page_num]
page.merge_page(watermark.pages[0])
writer.add_page(page)
# 写入输出文件
with open(output_pdf, "wb") as output_file:
writer.write(output_file)
print(f"水印添加完成: {output_pdf}")
# 结合转换和水印
def convert_with_watermark(odt_path, pdf_path, watermark_text=None):
"""转换并添加水印"""
# 先转换ODT到PDF
temp_pdf = "temp_output.pdf"
converter = LibreOfficeConverter()
converter.convert(odt_path, temp_pdf)
# 添加水印
if watermark_text:
add_watermark_to_pdf(temp_pdf, pdf_path, watermark_text)
os.remove(temp_pdf) # 删除临时文件
else:
os.rename(temp_pdf, pdf_path)
return pdf_path
# Dockerfile
FROM python:3.9-slim
# 安装LibreOffice
RUN apt-get update && apt-get install -y \
libreoffice \
libreoffice-writer \
&& rm -rf /var/lib/apt/lists/*
# 安装Python依赖
RUN pip install odf2pdf PyPDF2
# 复制转换脚本
COPY convert_odt_to_pdf.py /app/
WORKDIR /app
# 设置入口点
ENTRYPOINT ["python", "convert_odt_to_pdf.py"]
这个完整的指南涵盖了从简单到高级的各种ODT转PDF方法,您可以根据具体需求选择合适的方法。