aboutsummaryrefslogtreecommitdiff
path: root/www/jericho-html
diff options
context:
space:
mode:
authorMartin Wilke <miwi@FreeBSD.org>2008-07-19 17:02:19 +0000
committerMartin Wilke <miwi@FreeBSD.org>2008-07-19 17:02:19 +0000
commit8d9582086cb7b38d3bb10cce45af85398de7aaa5 (patch)
treea9291206c0475ed92636bb3d1b8d9e247a279bb9 /www/jericho-html
parent48c8d2e494f8fd09aa8cf70a56255c0223067800 (diff)
downloadports-8d9582086cb7b38d3bb10cce45af85398de7aaa5.tar.gz
ports-8d9582086cb7b38d3bb10cce45af85398de7aaa5.zip
Jericho HTML Parser is a simple but powerful java library allowing
analysis and manipulation of parts of an HTML document, including some common server-side tags, while reproducing verbatim any unrecognised or invalid HTML. It also provides high-level HTML form manipulation functions. WWW: http://jerichohtml.sourceforge.net/doc/index.html PR: ports/124770 Submitted by: Marcin Cieslak <saper at SYSTEM.PL>
Notes
Notes: svn path=/head/; revision=217126
Diffstat (limited to 'www/jericho-html')
-rw-r--r--www/jericho-html/Makefile52
-rw-r--r--www/jericho-html/distinfo3
-rw-r--r--www/jericho-html/files/patch-encoding15
-rw-r--r--www/jericho-html/pkg-descr8
4 files changed, 78 insertions, 0 deletions
diff --git a/www/jericho-html/Makefile b/www/jericho-html/Makefile
new file mode 100644
index 000000000000..4dc195625119
--- /dev/null
+++ b/www/jericho-html/Makefile
@@ -0,0 +1,52 @@
+# New ports collection makefile for: jerichohtml
+# Date created: 2008-06-17
+# Whom: Marcin Cieslak <saper@SYSTEM.PL>
+#
+# $FreeBSD$
+#
+
+PORTNAME= jericho-html
+PORTVERSION= 2.5
+CATEGORIES= www java
+MASTER_SITES= SF
+MASTER_SITE_SUBDIR= ${PORTNAME:S,-,,}
+
+MAINTAINER= saper@SYSTEM.PL
+COMMENT= A java library to analyse and manipulate HTML
+
+USE_ZIP= yes
+USE_JAVA= 1.3+
+
+INTERFACES:= "compile-time-dependencies/slf4j-api-1.4.3.jar:\
+ compile-time-dependencies/commons-logging-api-1.1.jar:\
+ compile-time-dependencies/log4j-api-1.2.14.jar"
+
+PORTDOCS= api
+PLIST_FILES+= %%JAVAJARDIR%%/${PORTNAME}.jar
+
+do-build:
+ (cd ${WRKSRC} && ${RM} -rf classes/* && ${JAVAC} \
+ -classpath ${INTERFACES:S, ,,g} \
+ -d classes src/java/au/id/jericho/lib/html/*.java \
+ src/java/au/id/jericho/lib/html/nodoc/*.java)
+ ${JAR} -cf ${WRKSRC}/lib/${PORTNAME}.jar \
+ -C ${WRKSRC}/classes .
+.if !defined(NOPORTDOCS)
+ (cd ${WRKSRC} && ${RM} -rf doc/* && ${JAVADOC} -quiet \
+ -windowtitle "Jericho HTML Parser ${PORTVERSION}" \
+ -classpath ${INTERFACES:S, ,,g}:src/java:classes \
+ -use -d ${WRKSRC}/doc/api \
+ -subpackages au.id.jericho.lib.html \
+ -exclude au.id.jericho.lib.html.nodoc \
+ -noqualifier au.id.jericho.lib.html \
+ -group "Core package" au.id.jericho.lib.html)
+.endif
+
+do-install:
+ ${INSTALL_DATA} ${WRKSRC}/lib/${PORTNAME}.jar ${JAVAJARDIR}
+.if !defined(NOPORTDOCS)
+ ${MKDIR} ${DOCSDIR}
+ (cd ${WRKSRC}/doc && ${FIND} api | ${CPIO} -pdmu ${DOCSDIR})
+.endif
+
+.include <bsd.port.mk>
diff --git a/www/jericho-html/distinfo b/www/jericho-html/distinfo
new file mode 100644
index 000000000000..280531763604
--- /dev/null
+++ b/www/jericho-html/distinfo
@@ -0,0 +1,3 @@
+MD5 (jericho-html-2.5.zip) = 64306d0eb82608e50496a680b319182d
+SHA256 (jericho-html-2.5.zip) = 212b9e8b72f9787dfafd046e8716f0d04365afcd3f4d2fb293e69d5b90e456b4
+SIZE (jericho-html-2.5.zip) = 1456664
diff --git a/www/jericho-html/files/patch-encoding b/www/jericho-html/files/patch-encoding
new file mode 100644
index 000000000000..3396a1440a73
--- /dev/null
+++ b/www/jericho-html/files/patch-encoding
@@ -0,0 +1,15 @@
+--- src/java/au/id/jericho/lib/html/StreamEncodingDetector.java.orig 2008-06-17 21:01:53.890292905 +0200
++++ src/java/au/id/jericho/lib/html/StreamEncodingDetector.java 2008-06-17 21:02:43.940300330 +0200
+@@ -203,9 +203,9 @@
+ // Assume the more likely case of four 8-bit characters <= U+00FF.
+ // Check whether it fits some common EBCDIC strings that might be found at the start of a document:
+ if (b1==0x4C) { // first character is EBCDIC '<' (ASCII 'L'), check a couple more characters before assuming EBCDIC encoding:
+- if (b2==0x6F && b3==0xA7 && b4==0x94) return setEncoding(EBCDIC,"default EBCDIC encoding (<?xml...> detected)"); // first four bytes are "<?xm" in EBCDIC ("Lo§”" in Windows-1252)
+- if (b2==0x5A && b3==0xC4 && b4==0xD6) return setEncoding(EBCDIC,"default EBCDIC encoding (<!DOCTYPE...> detected)"); // first four bytes are "<!DO" in EBCDIC ("LZÄÖ" in Windows-1252)
+- if ((b2&b3&b4&0x80)!=0) return setEncoding(EBCDIC,"default EBCDIC-compatible encoding (HTML element detected)"); // all of the 3 bytes after the '<' have the high-order bit set, indicating EBCDIC letters such as "<HTM" ("LÈãÔ" in Windows-1252), or "<htm" ("Lˆ£”" in Windows-1252)
++ if (b2==0x6F && b3==0xA7 && b4==0x94) return setEncoding(EBCDIC,"default EBCDIC encoding (<?xml...> detected)"); // first four bytes are "<?xm" in EBCDIC
++ if (b2==0x5A && b3==0xC4 && b4==0xD6) return setEncoding(EBCDIC,"default EBCDIC encoding (<!DOCTYPE...> detected)"); // first four bytes are "<!DO" in EBCDIC
++ if ((b2&b3&b4&0x80)!=0) return setEncoding(EBCDIC,"default EBCDIC-compatible encoding (HTML element detected)"); // all of the 3 bytes after the '<' have the high-order bit set, indicating EBCDIC letters such as "<HTM" or "<htm"
+ // although this is not an exhaustive check for EBCDIC, it is safer to assume a more common preliminary encoding if none of these conditions are met.
+ }
+ // Now confident that it is not EBCDIC, but some other 8-bit encoding.
diff --git a/www/jericho-html/pkg-descr b/www/jericho-html/pkg-descr
new file mode 100644
index 000000000000..b38a2d771bd5
--- /dev/null
+++ b/www/jericho-html/pkg-descr
@@ -0,0 +1,8 @@
+Jericho HTML Parser is a simple but powerful java library allowing
+analysis and manipulation of parts of an HTML document, including
+some common server-side tags, while reproducing verbatim any
+unrecognised or invalid HTML.
+
+It also provides high-level HTML form manipulation functions.
+
+WWW: http://jerichohtml.sourceforge.net/doc/index.html