python-docx#

python-docx 是用于创建和更新 Microsoft Word (.docx) 文件的 Python 库。

安装 很简单:

pip install python-docx

文档快速使用#

新建文档:

from docx import Document

document = Document()

添加一些东西:

document.add_heading("入门")
<docx.text.paragraph.Paragraph at 0x7f8244ecf380>

保存文档:

document.save('build/test.docx')
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
Cell In[3], line 1
----> 1 document.save('build/test.docx')

File /opt/hostedtoolcache/Python/3.12.3/x64/lib/python3.12/site-packages/docx/document.py:151, in Document.save(self, path_or_stream)
    145 def save(self, path_or_stream: str | IO[bytes]):
    146     """Save this document to `path_or_stream`.
    147 
    148     `path_or_stream` can be either a path to a filesystem location (a string) or a
    149     file-like object.
    150     """
--> 151     self._part.save(path_or_stream)

File /opt/hostedtoolcache/Python/3.12.3/x64/lib/python3.12/site-packages/docx/parts/document.py:106, in DocumentPart.save(self, path_or_stream)
    103 def save(self, path_or_stream):
    104     """Save this document to `path_or_stream`, which can be either a path to a
    105     filesystem location (a string) or a file-like object."""
--> 106     self.package.save(path_or_stream)

File /opt/hostedtoolcache/Python/3.12.3/x64/lib/python3.12/site-packages/docx/opc/package.py:151, in OpcPackage.save(self, pkg_file)
    149 for part in self.parts:
    150     part.before_marshal()
--> 151 PackageWriter.write(pkg_file, self.rels, self.parts)

File /opt/hostedtoolcache/Python/3.12.3/x64/lib/python3.12/site-packages/docx/opc/pkgwriter.py:27, in PackageWriter.write(pkg_file, pkg_rels, parts)
     23 @staticmethod
     24 def write(pkg_file, pkg_rels, parts):
     25     """Write a physical package (.pptx file) to `pkg_file` containing `pkg_rels` and
     26     `parts` and a content types stream based on the content types of the parts."""
---> 27     phys_writer = PhysPkgWriter(pkg_file)
     28     PackageWriter._write_content_types_stream(phys_writer, parts)
     29     PackageWriter._write_pkg_rels(phys_writer, pkg_rels)

File /opt/hostedtoolcache/Python/3.12.3/x64/lib/python3.12/site-packages/docx/opc/phys_pkg.py:109, in _ZipPkgWriter.__init__(self, pkg_file)
    107 def __init__(self, pkg_file):
    108     super(_ZipPkgWriter, self).__init__()
--> 109     self._zipf = ZipFile(pkg_file, "w", compression=ZIP_DEFLATED)

File /opt/hostedtoolcache/Python/3.12.3/x64/lib/python3.12/zipfile/__init__.py:1331, in ZipFile.__init__(self, file, mode, compression, allowZip64, compresslevel, strict_timestamps, metadata_encoding)
   1329 while True:
   1330     try:
-> 1331         self.fp = io.open(file, filemode)
   1332     except OSError:
   1333         if filemode in modeDict:

FileNotFoundError: [Errno 2] No such file or directory: 'build/test.docx'

打开已有文档:

document = Document('build/test.docx')
document.save('build/new-file-name.docx')
---------------------------------------------------------------------------
PackageNotFoundError                      Traceback (most recent call last)
Cell In[4], line 1
----> 1 document = Document('build/test.docx')
      2 document.save('build/new-file-name.docx')

File /opt/hostedtoolcache/Python/3.12.3/x64/lib/python3.12/site-packages/docx/api.py:23, in Document(docx)
     16 """Return a |Document| object loaded from `docx`, where `docx` can be either a path
     17 to a ``.docx`` file (a string) or a file-like object.
     18 
     19 If `docx` is missing or ``None``, the built-in default document "template" is
     20 loaded.
     21 """
     22 docx = _default_docx_path() if docx is None else docx
---> 23 document_part = Package.open(docx).main_document_part
     24 if document_part.content_type != CT.WML_DOCUMENT_MAIN:
     25     tmpl = "file '%s' is not a Word file, content type is '%s'"

File /opt/hostedtoolcache/Python/3.12.3/x64/lib/python3.12/site-packages/docx/opc/package.py:116, in OpcPackage.open(cls, pkg_file)
    113 @classmethod
    114 def open(cls, pkg_file):
    115     """Return an |OpcPackage| instance loaded with the contents of `pkg_file`."""
--> 116     pkg_reader = PackageReader.from_file(pkg_file)
    117     package = cls()
    118     Unmarshaller.unmarshal(pkg_reader, package, PartFactory)

File /opt/hostedtoolcache/Python/3.12.3/x64/lib/python3.12/site-packages/docx/opc/pkgreader.py:22, in PackageReader.from_file(pkg_file)
     19 @staticmethod
     20 def from_file(pkg_file):
     21     """Return a |PackageReader| instance loaded with contents of `pkg_file`."""
---> 22     phys_reader = PhysPkgReader(pkg_file)
     23     content_types = _ContentTypeMap.from_xml(phys_reader.content_types_xml)
     24     pkg_srels = PackageReader._srels_for(phys_reader, PACKAGE_URI)

File /opt/hostedtoolcache/Python/3.12.3/x64/lib/python3.12/site-packages/docx/opc/phys_pkg.py:21, in PhysPkgReader.__new__(cls, pkg_file)
     19         reader_cls = _ZipPkgReader
     20     else:
---> 21         raise PackageNotFoundError("Package not found at '%s'" % pkg_file)
     22 else:  # assume it's a stream and pass it to Zip reader to sort out
     23     reader_cls = _ZipPkgReader

PackageNotFoundError: Package not found at 'build/test.docx'

文档样式#

样式 改变着文档外观。

document = Document()
styles = document.styles
styles
<docx.styles.styles.Styles at 0x7f8244249d30>

文档分区#

Word 支持节的概念,节是文档中具有相同页面布局设置(如页边距和页面方向)的部分。

document = Document()
sections = document.sections
sections
<docx.section.Sections at 0x7f824441c860>
len(sections)
1
section = sections[0]
section
<docx.section.Section at 0x7f82442506e0>

添加分区:

document.add_section(start_type=2)
<docx.section.Section at 0x7f824424bad0>