diff options
| author | Yuri Victorovich <yuri@FreeBSD.org> | 2025-10-17 06:50:24 +0000 |
|---|---|---|
| committer | Yuri Victorovich <yuri@FreeBSD.org> | 2025-10-17 08:01:38 +0000 |
| commit | 6233720823f005302405de245f5b4abed1f0f4a8 (patch) | |
| tree | f9ac05e3a17b887ee0a9836c188fc53f6c6b08d9 | |
| parent | 8f89ff1cae6e61b0362cce580be1f710cc205b06 (diff) | |
textproc/py-html-text: New port: Extract text from HTML
| -rw-r--r-- | textproc/Makefile | 1 | ||||
| -rw-r--r-- | textproc/py-html-text/Makefile | 26 | ||||
| -rw-r--r-- | textproc/py-html-text/distinfo | 3 | ||||
| -rw-r--r-- | textproc/py-html-text/pkg-descr | 7 |
4 files changed, 37 insertions, 0 deletions
diff --git a/textproc/Makefile b/textproc/Makefile index 2c582e0f6f12..5dc56ee45dd4 100644 --- a/textproc/Makefile +++ b/textproc/Makefile @@ -1414,6 +1414,7 @@ SUBDIR += py-hexdump SUBDIR += py-hieroglyph SUBDIR += py-hjson + SUBDIR += py-html-text SUBDIR += py-html2json SUBDIR += py-html2text SUBDIR += py-htmlmin2 diff --git a/textproc/py-html-text/Makefile b/textproc/py-html-text/Makefile new file mode 100644 index 000000000000..35cd07a93c3d --- /dev/null +++ b/textproc/py-html-text/Makefile @@ -0,0 +1,26 @@ +PORTNAME= html-text +DISTVERSION= 0.7.1 +CATEGORIES= textproc python +MASTER_SITES= PYPI +PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX} +DISTNAME= ${PORTNAME:S/-/_/}-${DISTVERSION} + +MAINTAINER= ports@FreeBSD.org +COMMENT= Extract text from HTML +WWW= https://github.com/zytedata/html-text + +LICENSE= MIT +LICENSE_FILE= ${WRKSRC}/LICENSE + +BUILD_DEPENDS= ${PYTHON_PKGNAMEPREFIX}hatchling>=1.27.0:devel/py-hatchling@${PY_FLAVOR} +RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}lxml>0:devel/py-lxml@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}lxml-html-clean>0:devel/py-lxml-html-clean@${PY_FLAVOR} + +USES= python +USE_PYTHON= pep517 autoplist pytest + +NO_ARCH= yes + +# tests as of 0.7.1: 58 passed, 4 skipped, 1 xfailed, 5 errors in 4.68s + +.include <bsd.port.mk> diff --git a/textproc/py-html-text/distinfo b/textproc/py-html-text/distinfo new file mode 100644 index 000000000000..6355c5461e17 --- /dev/null +++ b/textproc/py-html-text/distinfo @@ -0,0 +1,3 @@ +TIMESTAMP = 1760676184 +SHA256 (html_text-0.7.1.tar.gz) = 87fd194310a9f54be32c7b18a70180dfa72a6b4d01cca35ac813c4d2b2b2ed8b +SIZE (html_text-0.7.1.tar.gz) = 54423 diff --git a/textproc/py-html-text/pkg-descr b/textproc/py-html-text/pkg-descr new file mode 100644 index 000000000000..3ded2dd0baf6 --- /dev/null +++ b/textproc/py-html-text/pkg-descr @@ -0,0 +1,7 @@ +Extract text from HTML. + +html_text is a library for extracting text from HTML, with a few handy +features: +- It removes leading and trailing whitespace +- It handles HTML entities +- It uses lxml for parsing |
