diff options
Diffstat (limited to 'textproc/py-ocrmypdf')
| -rw-r--r-- | textproc/py-ocrmypdf/Makefile | 7 | ||||
| -rw-r--r-- | textproc/py-ocrmypdf/distinfo | 6 | ||||
| -rw-r--r-- | textproc/py-ocrmypdf/files/patch-src_ocrmypdf_optimize.py | 66 |
3 files changed, 73 insertions, 6 deletions
diff --git a/textproc/py-ocrmypdf/Makefile b/textproc/py-ocrmypdf/Makefile index faba4fd54e22..157d71bad57d 100644 --- a/textproc/py-ocrmypdf/Makefile +++ b/textproc/py-ocrmypdf/Makefile @@ -1,5 +1,5 @@ PORTNAME= ocrmypdf -DISTVERSION= 16.10.4 +DISTVERSION= 16.11.1 CATEGORIES= textproc python MASTER_SITES= PYPI PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX} @@ -28,12 +28,13 @@ TEST_DEPENDS= ${PYTHON_PKGNAMEPREFIX}hypothesis>=6.36.0:devel/py-hypothesis@${PY ${PYTHON_PKGNAMEPREFIX}python-xmp-toolkit>=2.0.1:textproc/py-python-xmp-toolkit@${PY_FLAVOR} \ ${PYTHON_PKGNAMEPREFIX}reportlab>=3.6.8:print/py-reportlab@${PY_FLAVOR} -USES= ghostscript:run python:3.10+ shebangfix +USES= ghostscript:run python shebangfix USE_PYTHON= autoplist concurrent pep517 pytest # Skip some checks as they yield wrong results if run with the root account +# "test_watcher" requires additional deps used by the "watcher" feature PYTEST_IGNORED_TESTS= test_chmod \ test_input_file_not_readable \ - test_malformed_docinfo # leads to an internal pytest error + test_watcher SHEBANG_FILES= src/ocrmypdf/__main__.py \ src/ocrmypdf/pdfinfo/__init__.py diff --git a/textproc/py-ocrmypdf/distinfo b/textproc/py-ocrmypdf/distinfo index ff8097787e66..582ec949cdca 100644 --- a/textproc/py-ocrmypdf/distinfo +++ b/textproc/py-ocrmypdf/distinfo @@ -1,3 +1,3 @@ -TIMESTAMP = 1753074854 -SHA256 (ocrmypdf-16.10.4.tar.gz) = de749ef5f554b63d57e68d032e7cba5500cbd5030835bf24f658f7b7a04f3dc1 -SIZE (ocrmypdf-16.10.4.tar.gz) = 7003649 +TIMESTAMP = 1763048154 +SHA256 (ocrmypdf-16.11.1.tar.gz) = 838ab69e0ee0f04feea0d5861a17badecab6d3beaed0e29a97058eadda58cbb1 +SIZE (ocrmypdf-16.11.1.tar.gz) = 7015278 diff --git a/textproc/py-ocrmypdf/files/patch-src_ocrmypdf_optimize.py b/textproc/py-ocrmypdf/files/patch-src_ocrmypdf_optimize.py new file mode 100644 index 000000000000..34e6453d57df --- /dev/null +++ b/textproc/py-ocrmypdf/files/patch-src_ocrmypdf_optimize.py @@ -0,0 +1,66 @@ +From: "James R. Barlow" <james@purplerock.ca> +Date: Sun, 9 Nov 2025 15:43:36 -0800 +Subject: [PATCH] Work around Ghostscript 10.6.0 JPEG encoding issue by forcing + optimization. + +Not an ideal fix, but it improves an issue affecting numerous users. + +Fixes 1585. + +Obtained from: + +https://github.com/ocrmypdf/OCRmyPDF/commit/f4c6c8121ba8178ff3a1cb8f70037bbc3a31391b.patch + +--- src/ocrmypdf/optimize.py.orig 2020-02-02 00:00:00 UTC ++++ src/ocrmypdf/optimize.py +@@ -17,6 +17,7 @@ import img2pdf + from zlib import compress + + import img2pdf ++from packaging.version import Version + from pikepdf import ( + Dictionary, + Name, +@@ -32,7 +33,7 @@ from ocrmypdf._concurrent import Executor, SerialExecu + from PIL import Image + + from ocrmypdf._concurrent import Executor, SerialExecutor +-from ocrmypdf._exec import jbig2enc, pngquant ++from ocrmypdf._exec import ghostscript, jbig2enc, pngquant + from ocrmypdf._jobcontext import PdfContext + from ocrmypdf._progressbar import ProgressBar + from ocrmypdf.exceptions import OutputFileAccessError +@@ -189,6 +190,16 @@ def extract_image_jbig2( + return None + + ++def _should_optimize_jpeg(options, filtdp): ++ if options.optimize >= 2: ++ return True ++ if options.optimize < 2 and ghostscript.version() >= Version('10.6.0'): ++ # Ghostscript 10.6.0+ introduced some sort of JPEG encoding issue. ++ # To resolve this, re-optimize the JPEG anyway. ++ return True ++ return False ++ ++ + def extract_image_generic( + *, pdf: Pdf, root: Path, image: Stream, xref: Xref, options + ) -> XrefExt | None: +@@ -202,15 +213,7 @@ def extract_image_generic( + if pim.bits_per_component == 1: + return None + +- if filtdp[0] == Name.DCTDecode and options.optimize >= 2: +- # This is a simple heuristic derived from some training data, that has +- # about a 70% chance of guessing whether the JPEG is high quality, +- # and possibly recompressible, or not. The number itself doesn't mean +- # anything. +- # bytes_per_pixel = int(raw_jpeg.Length) / (w * h) +- # jpeg_quality_estimate = 117.0 * (bytes_per_pixel ** 0.213) +- # if jpeg_quality_estimate < 65: +- # return None ++ if filtdp[0] == Name.DCTDecode and _should_optimize_jpeg(options, filtdp): + try: + imgname = root / f'{xref:08d}' + with imgname.open('wb') as f: |
