aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPo-Chuan Hsieh <sunpoet@FreeBSD.org>2024-02-21 14:13:47 +0000
committerPo-Chuan Hsieh <sunpoet@FreeBSD.org>2024-02-21 15:06:10 +0000
commit0532aa392a247337f8afcc9c58025347d9fff64c (patch)
tree024c5cfddea859c068ed7608c31707b43ebbbce9
parent2ef9cc85d65db78284d96b702f1325e733ded4e8 (diff)
downloadports-0532aa392a247337f8afcc9c58025347d9fff64c.tar.gz
ports-0532aa392a247337f8afcc9c58025347d9fff64c.zip
www/py-htmldate: Add py-htmldate 1.7.0
htmldate finds original and updated publication dates of any web page. From the command-line or within Python, all the steps needed from web page download to HTML parsing, scraping, and text analysis are included.
-rw-r--r--www/Makefile1
-rw-r--r--www/py-htmldate/Makefile26
-rw-r--r--www/py-htmldate/distinfo3
-rw-r--r--www/py-htmldate/files/patch-setup.py11
-rw-r--r--www/py-htmldate/pkg-descr3
5 files changed, 44 insertions, 0 deletions
diff --git a/www/Makefile b/www/Makefile
index 44c67c184f35..8d99027b44c3 100644
--- a/www/Makefile
+++ b/www/Makefile
@@ -1721,6 +1721,7 @@
SUBDIR += py-html3
SUBDIR += py-html5-parser
SUBDIR += py-html5lib
+ SUBDIR += py-htmldate
SUBDIR += py-httmock
SUBDIR += py-http-parser
SUBDIR += py-httpbin
diff --git a/www/py-htmldate/Makefile b/www/py-htmldate/Makefile
new file mode 100644
index 000000000000..2e44731e6ea0
--- /dev/null
+++ b/www/py-htmldate/Makefile
@@ -0,0 +1,26 @@
+PORTNAME= htmldate
+PORTVERSION= 1.7.0
+CATEGORIES= www python
+MASTER_SITES= PYPI
+PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX}
+
+MAINTAINER= sunpoet@FreeBSD.org
+COMMENT= Fast and robust extraction of publication dates from URLs and web pages
+WWW= https://htmldate.readthedocs.io/en/latest/ \
+ https://github.com/adbar/htmldate
+
+LICENSE= GPLv3+
+LICENSE_FILE= ${WRKSRC}/LICENSE
+
+RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}charset-normalizer>=3.3.2:textproc/py-charset-normalizer@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}dateparser>=1.1.2:devel/py-dateparser@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}lxml>=4.9.3<6:devel/py-lxml@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}dateutil>=2.8.2:devel/py-dateutil@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}urllib3>=1.26,1<3,1:net/py-urllib3@${PY_FLAVOR}
+
+USES= python
+USE_PYTHON= autoplist concurrent distutils
+
+NO_ARCH= yes
+
+.include <bsd.port.mk>
diff --git a/www/py-htmldate/distinfo b/www/py-htmldate/distinfo
new file mode 100644
index 000000000000..010b81ad6da7
--- /dev/null
+++ b/www/py-htmldate/distinfo
@@ -0,0 +1,3 @@
+TIMESTAMP = 1708448862
+SHA256 (htmldate-1.7.0.tar.gz) = 02a800dd224cbf74bf483b042f64e14f57ba0e40c6b4404b284e98bc6c30b68d
+SIZE (htmldate-1.7.0.tar.gz) = 53992
diff --git a/www/py-htmldate/files/patch-setup.py b/www/py-htmldate/files/patch-setup.py
new file mode 100644
index 000000000000..1a2e3831b98c
--- /dev/null
+++ b/www/py-htmldate/files/patch-setup.py
@@ -0,0 +1,11 @@
+--- setup.py.orig 2024-01-17 16:57:16 UTC
++++ setup.py
+@@ -120,7 +120,7 @@ setup(
+ "dateparser >= 1.1.2", # 1.1.3+ slower
+ # see tests on Github Actions
+ "lxml == 4.9.2; platform_system == 'Darwin' and python_version <= '3.8'",
+- "lxml >= 4.9.4, < 6; platform_system != 'Darwin' or python_version > '3.8'",
++ "lxml >= 4.9.3, < 6; platform_system != 'Darwin' or python_version > '3.8'",
+ "python-dateutil >= 2.8.2",
+ "urllib3 >= 1.26, < 2; python_version < '3.7'",
+ "urllib3 >= 1.26, < 3; python_version >= '3.7'",
diff --git a/www/py-htmldate/pkg-descr b/www/py-htmldate/pkg-descr
new file mode 100644
index 000000000000..0d27dc7f96f5
--- /dev/null
+++ b/www/py-htmldate/pkg-descr
@@ -0,0 +1,3 @@
+htmldate finds original and updated publication dates of any web page. From the
+command-line or within Python, all the steps needed from web page download to
+HTML parsing, scraping, and text analysis are included.