aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYuri Victorovich <yuri@FreeBSD.org>2025-10-17 06:50:24 +0000
committerYuri Victorovich <yuri@FreeBSD.org>2025-10-17 08:01:38 +0000
commit6233720823f005302405de245f5b4abed1f0f4a8 (patch)
treef9ac05e3a17b887ee0a9836c188fc53f6c6b08d9
parent8f89ff1cae6e61b0362cce580be1f710cc205b06 (diff)
textproc/py-html-text: New port: Extract text from HTML
-rw-r--r--textproc/Makefile1
-rw-r--r--textproc/py-html-text/Makefile26
-rw-r--r--textproc/py-html-text/distinfo3
-rw-r--r--textproc/py-html-text/pkg-descr7
4 files changed, 37 insertions, 0 deletions
diff --git a/textproc/Makefile b/textproc/Makefile
index 2c582e0f6f12..5dc56ee45dd4 100644
--- a/textproc/Makefile
+++ b/textproc/Makefile
@@ -1414,6 +1414,7 @@
SUBDIR += py-hexdump
SUBDIR += py-hieroglyph
SUBDIR += py-hjson
+ SUBDIR += py-html-text
SUBDIR += py-html2json
SUBDIR += py-html2text
SUBDIR += py-htmlmin2
diff --git a/textproc/py-html-text/Makefile b/textproc/py-html-text/Makefile
new file mode 100644
index 000000000000..35cd07a93c3d
--- /dev/null
+++ b/textproc/py-html-text/Makefile
@@ -0,0 +1,26 @@
+PORTNAME= html-text
+DISTVERSION= 0.7.1
+CATEGORIES= textproc python
+MASTER_SITES= PYPI
+PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX}
+DISTNAME= ${PORTNAME:S/-/_/}-${DISTVERSION}
+
+MAINTAINER= ports@FreeBSD.org
+COMMENT= Extract text from HTML
+WWW= https://github.com/zytedata/html-text
+
+LICENSE= MIT
+LICENSE_FILE= ${WRKSRC}/LICENSE
+
+BUILD_DEPENDS= ${PYTHON_PKGNAMEPREFIX}hatchling>=1.27.0:devel/py-hatchling@${PY_FLAVOR}
+RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}lxml>0:devel/py-lxml@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}lxml-html-clean>0:devel/py-lxml-html-clean@${PY_FLAVOR}
+
+USES= python
+USE_PYTHON= pep517 autoplist pytest
+
+NO_ARCH= yes
+
+# tests as of 0.7.1: 58 passed, 4 skipped, 1 xfailed, 5 errors in 4.68s
+
+.include <bsd.port.mk>
diff --git a/textproc/py-html-text/distinfo b/textproc/py-html-text/distinfo
new file mode 100644
index 000000000000..6355c5461e17
--- /dev/null
+++ b/textproc/py-html-text/distinfo
@@ -0,0 +1,3 @@
+TIMESTAMP = 1760676184
+SHA256 (html_text-0.7.1.tar.gz) = 87fd194310a9f54be32c7b18a70180dfa72a6b4d01cca35ac813c4d2b2b2ed8b
+SIZE (html_text-0.7.1.tar.gz) = 54423
diff --git a/textproc/py-html-text/pkg-descr b/textproc/py-html-text/pkg-descr
new file mode 100644
index 000000000000..3ded2dd0baf6
--- /dev/null
+++ b/textproc/py-html-text/pkg-descr
@@ -0,0 +1,7 @@
+Extract text from HTML.
+
+html_text is a library for extracting text from HTML, with a few handy
+features:
+- It removes leading and trailing whitespace
+- It handles HTML entities
+- It uses lxml for parsing