Skip to content

Commit

Permalink
Ignore unrecognized page titles in the outline
Browse files Browse the repository at this point in the history
Issue #23
  • Loading branch information
v-- committed Dec 9, 2023
1 parent 5ee6667 commit 9f0d832
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 1 deletion.
8 changes: 7 additions & 1 deletion dpsprep/outline.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from loguru import logger
from pdfrw import PdfName, PdfDict, IndirectPdfDict
import djvu.sexpr

Expand All @@ -10,7 +11,12 @@ class OutlineTransformVisitor(SExpressionVisitor):
def visit_plain_list(self, node: djvu.sexpr.StringExpression, parent: IndirectPdfDict):
title, page, *rest = node
# I have experimentally determined that we need to translate page indices. -- Ianis, 2023-05-03
page_number = int(page.value[1:]) - 1
try:
page_number = int(page.value[1:]) - 1
except ValueError:
# As far as I understand, python-djvulibre doesn't support Djvu's page titles. -- Ianis, 2023-12-09
logger.warning(f'Could not determine page number from the page title {page.value}.')
return

bookmark = IndirectPdfDict(
Parent = parent,
Expand Down
67 changes: 67 additions & 0 deletions dpsprep/test_outline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from djvu import sexpr
from pdfrw import IndirectPdfDict

from .outline import OutlineTransformVisitor


def test_basic_outline():
src = sexpr.ListExpression([
sexpr.SymbolExpression(sexpr.Symbol('bookmarks')),
sexpr.ListExpression([
sexpr.StringExpression(b'Chapter 2'),
sexpr.StringExpression(b'#100'),
])
])

visitor = OutlineTransformVisitor()
bookmarks = visitor.visit(src)
assert bookmarks.Count == 1
assert bookmarks.First.Title == 'Chapter 2'
assert bookmarks.First.A.D[0] == 99 # The page number


def test_nested_outline():
src = sexpr.ListExpression([
sexpr.SymbolExpression(sexpr.Symbol('bookmarks')),
sexpr.ListExpression([
sexpr.StringExpression(b'Chapter 2'),
sexpr.StringExpression(b'#100'),
sexpr.ListExpression([
sexpr.StringExpression(b'Chapter 2.1'),
sexpr.StringExpression(b'#200'),
])
])
])

visitor = OutlineTransformVisitor()
bookmarks = visitor.visit(src)
assert bookmarks.Count == 1
assert bookmarks.First.Count == 1
assert bookmarks.First.A.D[0] == 99 # The page number of chapter 2
assert bookmarks.First.First.A.D[0] == 199 # The page number of chapter 2.1


# Sometimes the page numbers are instead page titles, which our libdjvu bindings do not support
# We ignore them since there is not much we can do in this case
# See https://github.com/kcroker/dpsprep/issues/23
def test_outline_with_page_titles():
src = sexpr.ListExpression([
sexpr.SymbolExpression(sexpr.Symbol('bookmarks')),
sexpr.ListExpression([
sexpr.StringExpression(b'Preface'),
sexpr.StringExpression(b'#f007.djvu'),
]),
sexpr.ListExpression([
sexpr.StringExpression(b'Contents'),
sexpr.StringExpression(b'#f011.djvu'),
]),
sexpr.ListExpression([
sexpr.StringExpression(b'0 Prologue'),
sexpr.StringExpression(b'#p001.djvu')
])
])

visitor = OutlineTransformVisitor()
bookmarks = visitor.visit(src)
empty_pdf_dict = IndirectPdfDict()
assert bookmarks == empty_pdf_dict

0 comments on commit 9f0d832

Please sign in to comment.