summaryrefslogtreecommitdiff
path: root/www/htmlcxx
diff options
context:
space:
mode:
authorwiz <wiz@pkgsrc.org>2014-02-16 22:58:51 +0000
committerwiz <wiz@pkgsrc.org>2014-02-16 22:58:51 +0000
commitc7cd6409088db0ec4f20f7b297deb0e187635d79 (patch)
tree3b311d1307f107d671922b85898f92fd11ca26e4 /www/htmlcxx
parenta3a2a0953bbded621d965b3d4d2af026039fdbba (diff)
downloadpkgsrc-c7cd6409088db0ec4f20f7b297deb0e187635d79.tar.gz
Import htmlcxx-0.85 as www/htmlcxx.
htmlcxx is a simple non-validating CSS1 and HTML parser for C++. Although there are several other HTML parsers available, htmlcxx has some characteristics that make it unique: * STL like navigation of DOM tree, using the excellent tree.hh library from Kasper Peeters * It is possible to reproduce exactly, character by character, the original document from the parse tree * Bundled css parser * Optional parsing of attributes * C++ code that looks like C++ (not so true anymore) * Offsets of tags/elements in the original document are stored in the nodes of the DOM tree The parsing politics of htmlcxx were created trying to mimic Mozilla Firefox behavior. So you should expect parse trees similar to those create by Firefox. However, differently from Firefox, htmlcxx does not insert non-existent stuff in your html. Therefore, serializing the DOM tree gives exactly the same bytes contained in the original HTML document.
Diffstat (limited to 'www/htmlcxx')
-rw-r--r--www/htmlcxx/DESCR20
-rw-r--r--www/htmlcxx/Makefile19
-rw-r--r--www/htmlcxx/PLIST20
-rw-r--r--www/htmlcxx/buildlink3.mk12
-rw-r--r--www/htmlcxx/distinfo7
-rw-r--r--www/htmlcxx/patches/patch-html_CharsetConverter.cc15
-rw-r--r--www/htmlcxx/patches/patch-html_ci__string.h18
7 files changed, 111 insertions, 0 deletions
diff --git a/www/htmlcxx/DESCR b/www/htmlcxx/DESCR
new file mode 100644
index 00000000000..d877e942d19
--- /dev/null
+++ b/www/htmlcxx/DESCR
@@ -0,0 +1,20 @@
+htmlcxx is a simple non-validating CSS1 and HTML parser for C++.
+Although there are several other HTML parsers available, htmlcxx
+has some characteristics that make it unique:
+
+* STL like navigation of DOM tree, using the excellent tree.hh library
+ from Kasper Peeters
+* It is possible to reproduce exactly, character by character, the
+ original document from the parse tree
+* Bundled css parser
+* Optional parsing of attributes
+* C++ code that looks like C++ (not so true anymore)
+* Offsets of tags/elements in the original document are stored in
+ the nodes of the DOM tree
+
+The parsing politics of htmlcxx were created trying to mimic Mozilla
+Firefox behavior. So you should expect parse trees similar to those
+create by Firefox. However, differently from Firefox, htmlcxx does
+not insert non-existent stuff in your html. Therefore, serializing
+the DOM tree gives exactly the same bytes contained in the original
+HTML document.
diff --git a/www/htmlcxx/Makefile b/www/htmlcxx/Makefile
new file mode 100644
index 00000000000..c648e62747c
--- /dev/null
+++ b/www/htmlcxx/Makefile
@@ -0,0 +1,19 @@
+# $NetBSD: Makefile,v 1.1 2014/02/16 22:58:51 wiz Exp $
+
+DISTNAME= htmlcxx-0.85
+CATEGORIES= www
+MASTER_SITES= ${MASTER_SITE_SOURCEFORGE:=htmlcxx/}
+
+MAINTAINER= pkgsrc-users@NetBSD.org
+HOMEPAGE= http://htmlcxx.sourceforge.net/
+COMMENT= Simple non-validating CSS1 and HTML parser for C++
+LICENSE= gnu-lgpl-v2 AND apache-2.0
+
+GNU_CONFIGURE= yes
+USE_LIBTOOL= yes
+USE_TOOLS+= pkg-config
+USE_LANGUAGES= c c++
+
+PKGCONFIG_OVERRIDE+= htmlcxx.pc.in
+
+.include "../../mk/bsd.pkg.mk"
diff --git a/www/htmlcxx/PLIST b/www/htmlcxx/PLIST
new file mode 100644
index 00000000000..0508026fe1c
--- /dev/null
+++ b/www/htmlcxx/PLIST
@@ -0,0 +1,20 @@
+@comment $NetBSD: PLIST,v 1.1 2014/02/16 22:58:51 wiz Exp $
+bin/htmlcxx
+include/htmlcxx/css/parser.h
+include/htmlcxx/css/parser_pp.h
+include/htmlcxx/html/CharsetConverter.h
+include/htmlcxx/html/Extensions.h
+include/htmlcxx/html/Node.h
+include/htmlcxx/html/ParserDom.h
+include/htmlcxx/html/ParserSax.h
+include/htmlcxx/html/ParserSax.tcc
+include/htmlcxx/html/Uri.h
+include/htmlcxx/html/ci_string.h
+include/htmlcxx/html/tld.h
+include/htmlcxx/html/tree.h
+include/htmlcxx/html/utils.h
+lib/libcss_parser.la
+lib/libcss_parser_pp.la
+lib/libhtmlcxx.la
+lib/pkgconfig/htmlcxx.pc
+share/htmlcxx/css/default.css
diff --git a/www/htmlcxx/buildlink3.mk b/www/htmlcxx/buildlink3.mk
new file mode 100644
index 00000000000..3fb5b4934cd
--- /dev/null
+++ b/www/htmlcxx/buildlink3.mk
@@ -0,0 +1,12 @@
+# $NetBSD: buildlink3.mk,v 1.1 2014/02/16 22:58:51 wiz Exp $
+
+BUILDLINK_TREE+= htmlcxx
+
+.if !defined(HTMLCXX_BUILDLINK3_MK)
+HTMLCXX_BUILDLINK3_MK:=
+
+BUILDLINK_API_DEPENDS.htmlcxx+= htmlcxx>=0.85
+BUILDLINK_PKGSRCDIR.htmlcxx?= ../../www/htmlcxx
+.endif # HTMLCXX_BUILDLINK3_MK
+
+BUILDLINK_TREE+= -htmlcxx
diff --git a/www/htmlcxx/distinfo b/www/htmlcxx/distinfo
new file mode 100644
index 00000000000..78c7b66d162
--- /dev/null
+++ b/www/htmlcxx/distinfo
@@ -0,0 +1,7 @@
+$NetBSD: distinfo,v 1.1 2014/02/16 22:58:51 wiz Exp $
+
+SHA1 (htmlcxx-0.85.tar.gz) = e56fef830db51041fd297d269d24379b2dccb928
+RMD160 (htmlcxx-0.85.tar.gz) = d357b4c29127aae7f06da666c004c9db26ef29a4
+Size (htmlcxx-0.85.tar.gz) = 414624 bytes
+SHA1 (patch-html_CharsetConverter.cc) = 72204f93fa3639328b73c5bed1841f23c32946c8
+SHA1 (patch-html_ci__string.h) = 71438e4eea290f5934918d1996d0c80eefd5ffe5
diff --git a/www/htmlcxx/patches/patch-html_CharsetConverter.cc b/www/htmlcxx/patches/patch-html_CharsetConverter.cc
new file mode 100644
index 00000000000..72dc5fa8a12
--- /dev/null
+++ b/www/htmlcxx/patches/patch-html_CharsetConverter.cc
@@ -0,0 +1,15 @@
+$NetBSD: patch-html_CharsetConverter.cc,v 1.1 2014/02/16 22:58:51 wiz Exp $
+
+Fix build on NetBSD.
+
+--- html/CharsetConverter.cc.orig 2011-05-15 17:32:10.000000000 +0000
++++ html/CharsetConverter.cc
+@@ -37,7 +37,7 @@ string CharsetConverter::convert(const s
+
+ size_t ret;
+ while (1) {
+- ret = iconv(mIconvDescriptor, const_cast<char**>(&inbuf), &inbytesleft, &outbuf, &outbytesleft);
++ ret = iconv(mIconvDescriptor, (const char**)(&inbuf), &inbytesleft, &outbuf, &outbytesleft);
+ if (ret == 0) break;
+ if (ret == (size_t)-1 && errno == E2BIG) return string();
+
diff --git a/www/htmlcxx/patches/patch-html_ci__string.h b/www/htmlcxx/patches/patch-html_ci__string.h
new file mode 100644
index 00000000000..d31923e0298
--- /dev/null
+++ b/www/htmlcxx/patches/patch-html_ci__string.h
@@ -0,0 +1,18 @@
+$NetBSD: patch-html_ci__string.h,v 1.1 2014/02/16 22:58:51 wiz Exp $
+
+Fix build with clang, from
+https://github.com/dhoerl/htmlcxx/
+
+--- html/ci_string.h.orig 2011-02-13 22:24:53.000000000 +0000
++++ html/ci_string.h
+@@ -4,7 +4,9 @@
+ #include <cctype>
+ #include <string>
+
+-#if __GNUC__ >= 3
++#if __clang_major__ >= 3
++struct ci_char_traits : public std::char_traits<char>
++#elif __GNUC__ >= 3
+ #include <bits/char_traits.h>
+ struct ci_char_traits : public std::char_traits<char>
+ #elif defined(__GNUC__)