Skip to content

Commit

Permalink
Merge pull request #24 from kcroker/bugfix/outline
Browse files Browse the repository at this point in the history
Ignore unrecognized page titles in the outline
  • Loading branch information
v-- committed Dec 9, 2023
2 parents 5ee6667 + ab6665c commit 3d4c8c3
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 2 deletions.
8 changes: 7 additions & 1 deletion dpsprep/outline.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from loguru import logger
from pdfrw import PdfName, PdfDict, IndirectPdfDict
import djvu.sexpr

Expand All @@ -10,7 +11,12 @@ class OutlineTransformVisitor(SExpressionVisitor):
def visit_plain_list(self, node: djvu.sexpr.StringExpression, parent: IndirectPdfDict):
title, page, *rest = node
# I have experimentally determined that we need to translate page indices. -- Ianis, 2023-05-03
page_number = int(page.value[1:]) - 1
try:
page_number = int(page.value[1:]) - 1
except ValueError:
# As far as I understand, python-djvulibre doesn't support Djvu's page titles. -- Ianis, 2023-12-09
logger.warning(f'Could not determine page number from the page title {page.value}.')
return

bookmark = IndirectPdfDict(
Parent = parent,
Expand Down
67 changes: 67 additions & 0 deletions dpsprep/test_outline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from djvu import sexpr
from pdfrw import IndirectPdfDict

from .outline import OutlineTransformVisitor


def test_basic_outline():
src = sexpr.ListExpression([
sexpr.SymbolExpression(sexpr.Symbol('bookmarks')),
sexpr.ListExpression([
sexpr.StringExpression(b'Chapter 2'),
sexpr.StringExpression(b'#100'),
])
])

visitor = OutlineTransformVisitor()
bookmarks = visitor.visit(src)
assert bookmarks.Count == 1
assert bookmarks.First.Title == 'Chapter 2'
assert bookmarks.First.A.D[0] == 99 # The page number


def test_nested_outline():
src = sexpr.ListExpression([
sexpr.SymbolExpression(sexpr.Symbol('bookmarks')),
sexpr.ListExpression([
sexpr.StringExpression(b'Chapter 2'),
sexpr.StringExpression(b'#100'),
sexpr.ListExpression([
sexpr.StringExpression(b'Chapter 2.1'),
sexpr.StringExpression(b'#200'),
])
])
])

visitor = OutlineTransformVisitor()
bookmarks = visitor.visit(src)
assert bookmarks.Count == 1
assert bookmarks.First.Count == 1
assert bookmarks.First.A.D[0] == 99 # The page number of chapter 2
assert bookmarks.First.First.A.D[0] == 199 # The page number of chapter 2.1


# Sometimes the page numbers are instead page titles, which our libdjvu bindings do not support
# We ignore them since there is not much we can do in this case
# See https://github.com/kcroker/dpsprep/issues/23
def test_outline_with_page_titles():
src = sexpr.ListExpression([
sexpr.SymbolExpression(sexpr.Symbol('bookmarks')),
sexpr.ListExpression([
sexpr.StringExpression(b'Preface'),
sexpr.StringExpression(b'#f007.djvu'),
]),
sexpr.ListExpression([
sexpr.StringExpression(b'Contents'),
sexpr.StringExpression(b'#f011.djvu'),
]),
sexpr.ListExpression([
sexpr.StringExpression(b'0 Prologue'),
sexpr.StringExpression(b'#p001.djvu')
])
])

visitor = OutlineTransformVisitor()
bookmarks = visitor.visit(src)
empty_pdf_dict = IndirectPdfDict()
assert bookmarks == empty_pdf_dict
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "dpsprep"
version = "2.2.2"
version = "2.2.3"
description = "A DjVu to PDF converter with a focus on small output size and the ability to preserve document outlines and text layers"
authors = ["Kevin Arthur Schiff Croker", "Ianis Vasilev"]
license = "GPL-3.0-or-later"
Expand Down

0 comments on commit 3d4c8c3

Please sign in to comment.