Merge internal PDF Table of Contents

Published July 21, 2025 · 1 min read

When working with large PDF documents, having a detailed and accurate Table of Contents (ToC) is crucial for navigation and usability. However, many PDFs lack a proper ToC or have one that is not well integrated into the document structure. Or in my case, I had several PDFs with their own internal ToC, but they were not merged into a single, cohesive ToC for the entire document.

To address this issue, I developed a Python script that automates the process of merging an internal ToC into a PDF file. This tool is designed to be run from the command line, making it accessible and easy to use for anyone who needs to enhance their PDF documents.

#!/usr/bin/env python

from pypdf import PdfReader, PdfWriter, PageObject, Transformation, PaperSize
from pypdf.generic import Destination
import sys
import os

def add_bookmarks(writer, reader, bookmarks, parent, page_offset):
    i = 0
    while i < len(bookmarks):
        item = bookmarks[i]

        if isinstance(item, Destination):
            # Get title and page number
            title = item.title
            page_number = reader.get_page_number(item.page) + page_offset
            # original_page = reader.get_page_number(item.page) + 1  # 1-based for humans
            # title = f"{item.title} (p.{original_page})"
            # page_number = original_page - 1 + page_offset  # 0-based for PDF

            # Add current bookmark
            node = writer.add_outline_item(title, page_number=page_number, parent=parent)

            # Look ahead: if next item is a list, treat it as children
            if i + 1 < len(bookmarks) and isinstance(bookmarks[i + 1], list):
                add_bookmarks(writer, reader, bookmarks[i + 1], node, page_offset)
                i += 1  # Skip the children list in next iteration

        elif isinstance(item, list):
            # Unexpected nested list (should always follow a Destination)
            # Recursively process just in case
            add_bookmarks(writer, reader, item, parent, page_offset)

        i += 1

def merge_pdfs_with_custom_outline(output_path, input_paths):
    writer = PdfWriter()
    page_offset = 0

    for input_path in input_paths:
        reader = PdfReader(input_path)
        num_pages = len(reader.pages)

        # Create a new root bookmark using the filename
        custom_label = "Band " + os.path.splitext(os.path.basename(input_path))[0].split(".")[0]
        print(custom_label)

        if page_offset == 0:
            root = None
            for page in reader.pages:
                page.scale_to(481, 681) #PaperSize.A5.width, PaperSize.A5.height)
        else:
            root = writer.add_outline_item(custom_label, page_offset)

        # Append pages
        # writer.append(reader, import_outline = False)
        writer.append(reader, import_outline = False)

        # Copy and nest the original outline under this root
        outlines = reader.outline
        add_bookmarks(writer, reader, outlines, root, page_offset)

        page_offset += num_pages

    # Write the output
    with open(output_path, "wb") as f:
        writer.write(f)

if __name__ == "__main__":
    if len(sys.argv) < 3:
        print("Usage: python merge_with_custom_outline.py output.pdf input1.pdf input2.pdf ...")
        sys.exit(1)

    output_pdf = sys.argv[1]
    input_pdfs = sys.argv[2:]

    merge_pdfs_with_custom_outline(output_pdf, input_pdfs)

And since I also use NixOS, I wrote a separate flake. It can be used for a development environment or (if modified) can be used to install the script as a program.

{
  description = "Dev shell with Python and pypdf for PDF TOC merging";

  inputs.nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
  inputs.flake-utils.url = "github:numtide/flake-utils";

  outputs = { self, nixpkgs, flake-utils }:
    flake-utils.lib.eachDefaultSystem (system:
      let
        pkgs = nixpkgs.legacyPackages.${system};
      in {
        devShells.default = pkgs.mkShell {
          name = "pypdf-shell";
          buildInputs = [
            pkgs.python312
            pkgs.python312Packages.pypdf
          ];
        };
      });
}