diff options
Diffstat (limited to 'result')
30 files changed, 1899 insertions, 1 deletions
diff --git a/result/comment3.xml b/result/comment3.xml new file mode 100644 index 0000000..395f67c --- /dev/null +++ b/result/comment3.xml @@ -0,0 +1,164 @@ +<?xml version="1.0" encoding="ISO-8859-1"?> +<!-- test of very very long comments and buffer limits +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +--> +<doc/> diff --git a/result/comment3.xml.rde b/result/comment3.xml.rde new file mode 100644 index 0000000..519b8d0 --- /dev/null +++ b/result/comment3.xml.rde @@ -0,0 +1,163 @@ +0 8 #comment 0 1 test of very very long comments and buffer limits +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 + +0 1 doc 1 0 diff --git a/result/comment3.xml.rdr b/result/comment3.xml.rdr new file mode 100644 index 0000000..519b8d0 --- /dev/null +++ b/result/comment3.xml.rdr @@ -0,0 +1,163 @@ +0 8 #comment 0 1 test of very very long comments and buffer limits +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 + +0 1 doc 1 0 diff --git a/result/comment3.xml.sax b/result/comment3.xml.sax new file mode 100644 index 0000000..82b8b36 --- /dev/null +++ b/result/comment3.xml.sax @@ -0,0 +1,167 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.comment( test of very very long comments and buffer limits +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +) +SAX.startElement(doc) +SAX.endElement(doc) +SAX.endDocument() diff --git a/result/comment4.xml b/result/comment4.xml new file mode 100644 index 0000000..93282d8 --- /dev/null +++ b/result/comment4.xml @@ -0,0 +1,5 @@ +<?xml version="1.0" encoding="ISO-8859-1"?> +<!-- test of non ascii comments like là et très --> +<!--à another one --> +<!-- another one à--> +<doc/> diff --git a/result/comment4.xml.rde b/result/comment4.xml.rde new file mode 100644 index 0000000..09e181b --- /dev/null +++ b/result/comment4.xml.rde @@ -0,0 +1,4 @@ +0 8 #comment 0 1 test of non ascii comments like là et très +0 8 #comment 0 1 à another one +0 8 #comment 0 1 another one à +0 1 doc 1 0 diff --git a/result/comment4.xml.rdr b/result/comment4.xml.rdr new file mode 100644 index 0000000..09e181b --- /dev/null +++ b/result/comment4.xml.rdr @@ -0,0 +1,4 @@ +0 8 #comment 0 1 test of non ascii comments like là et très +0 8 #comment 0 1 à another one +0 8 #comment 0 1 another one à +0 1 doc 1 0 diff --git a/result/comment4.xml.sax b/result/comment4.xml.sax new file mode 100644 index 0000000..26011d9 --- /dev/null +++ b/result/comment4.xml.sax @@ -0,0 +1,8 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.comment( test of non ascii comments like là et très ) +SAX.comment(à another one ) +SAX.comment( another one à ) +SAX.startElement(doc) +SAX.endElement(doc) +SAX.endDocument() diff --git a/result/comment5.xml b/result/comment5.xml new file mode 100644 index 0000000..398f974 --- /dev/null +++ b/result/comment5.xml @@ -0,0 +1,9 @@ +<?xml version="1.0" encoding="ISO-8859-1"?> +<!-- test of hyphen and line break handling + some text - interrupted - +- - - - - - - - - - - - - - - - - - - - - - + this should stop here^ + + +--> +<doc/> diff --git a/result/comment5.xml.rde b/result/comment5.xml.rde new file mode 100644 index 0000000..fe7a88f --- /dev/null +++ b/result/comment5.xml.rde @@ -0,0 +1,8 @@ +0 8 #comment 0 1 test of hyphen and line break handling + some text - interrupted - +- - - - - - - - - - - - - - - - - - - - - - + this should stop here^ + + + +0 1 doc 1 0 diff --git a/result/comment5.xml.rdr b/result/comment5.xml.rdr new file mode 100644 index 0000000..fe7a88f --- /dev/null +++ b/result/comment5.xml.rdr @@ -0,0 +1,8 @@ +0 8 #comment 0 1 test of hyphen and line break handling + some text - interrupted - +- - - - - - - - - - - - - - - - - - - - - - + this should stop here^ + + + +0 1 doc 1 0 diff --git a/result/comment5.xml.sax b/result/comment5.xml.sax new file mode 100644 index 0000000..dd58e7a --- /dev/null +++ b/result/comment5.xml.sax @@ -0,0 +1,12 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.comment( test of hyphen and line break handling + some text - interrupted - +- - - - - - - - - - - - - - - - - - - - - - + this should stop here^ + + +) +SAX.startElement(doc) +SAX.endElement(doc) +SAX.endDocument() diff --git a/result/errors/webdav.xml b/result/errors/webdav.xml new file mode 100644 index 0000000..2646219 --- /dev/null +++ b/result/errors/webdav.xml @@ -0,0 +1,8 @@ +<?xml version="1.0" encoding="utf-8"?> +<propfind xmlns="DAV:"><prop> +<getcontentlength xmlns="DAV:"/> +<getlastmodified xmlns="DAV:"/> +<displayname xmlns="DAV:"/> +<executable xmlns="http://apache.org/dav/props/"/> +<resourcetype xmlns="DAV:"/> +</prop></propfind> diff --git a/result/errors/webdav.xml.err b/result/errors/webdav.xml.err new file mode 100644 index 0000000..a722454 --- /dev/null +++ b/result/errors/webdav.xml.err @@ -0,0 +1,15 @@ +./test/errors/webdav.xml:2: parser warning : xmlns: DAV: not a valid URI +<propfind xmlns="DAV:"><prop> + ^ +./test/errors/webdav.xml:3: parser warning : xmlns: DAV: not a valid URI +<getcontentlength xmlns="DAV:"/> + ^ +./test/errors/webdav.xml:4: parser warning : xmlns: DAV: not a valid URI +<getlastmodified xmlns="DAV:"/> + ^ +./test/errors/webdav.xml:5: parser warning : xmlns: DAV: not a valid URI +<displayname xmlns="DAV:"/> + ^ +./test/errors/webdav.xml:7: parser warning : xmlns: DAV: not a valid URI +<resourcetype xmlns="DAV:"/> + ^ diff --git a/result/errors/webdav.xml.str b/result/errors/webdav.xml.str new file mode 100644 index 0000000..a722454 --- /dev/null +++ b/result/errors/webdav.xml.str @@ -0,0 +1,15 @@ +./test/errors/webdav.xml:2: parser warning : xmlns: DAV: not a valid URI +<propfind xmlns="DAV:"><prop> + ^ +./test/errors/webdav.xml:3: parser warning : xmlns: DAV: not a valid URI +<getcontentlength xmlns="DAV:"/> + ^ +./test/errors/webdav.xml:4: parser warning : xmlns: DAV: not a valid URI +<getlastmodified xmlns="DAV:"/> + ^ +./test/errors/webdav.xml:5: parser warning : xmlns: DAV: not a valid URI +<displayname xmlns="DAV:"/> + ^ +./test/errors/webdav.xml:7: parser warning : xmlns: DAV: not a valid URI +<resourcetype xmlns="DAV:"/> + ^ diff --git a/result/intsubset2.xml b/result/intsubset2.xml new file mode 100644 index 0000000..b103955 --- /dev/null +++ b/result/intsubset2.xml @@ -0,0 +1,250 @@ +<?xml version="1.0"?> +<!DOCTYPE kanjidic2 [ +<!-- Version 1.3 + This is the DTD of the XML-format kanji file combining information from + the KANJIDIC and KANJD212 files. It is intended to be largely self- + documenting, with each field being accompanied by an explanatory + comment. + + The file covers the following kanji: + (a) the 6,355 kanji from JIS X 0208; + (b) the 5,801 kanji from JIS X 0212; + (c) the 3,625 kanji from JIS X 0213 as follows: + (i) the 2,741 kanji which are also in JIS X 0212 have + JIS X 0213 code-points (kuten) added to the existing entry; + (ii) the 884 "new" kanji have new entries. + + At the end of the explanation for a number of fields there is a tag + with the format [N]. This indicates the leading letter(s) of the + equivalent field in the KANJIDIC and KANJD212 files. + + The KANJIDIC documentation should also be read for additional + information about the information in the file. + --><!ELEMENT kanjidic2 (header , character*)> +<!ELEMENT header (file_version , database_version , date_of_creation)> +<!-- + The single header element will contain identification information + about the version of the file + --><!ELEMENT file_version (#PCDATA)> +<!-- + This field denotes the version of kanjidic2 structure, as more + than one version may exist. + --><!ELEMENT database_version (#PCDATA)> +<!-- + The version of the file, in the format YYYY-NN, where NN will be + a number starting with 01 for the first version released in a + calendar year, then increasing for each version in that year. + --><!ELEMENT date_of_creation (#PCDATA)> +<!-- + The date the file was created in international format (YYYY-MM-DD). + --><!ELEMENT character (literal , codepoint , radical , misc , dic_number? , query_code? , reading_meaning? , nanori?)*> +<!ELEMENT literal (#PCDATA)> +<!-- + The character itself in UTF8 coding. + --><!ELEMENT codepoint (cp_value)+> +<!-- + The codepoint element states the code of the character in the various + character set standards. + --><!ELEMENT cp_value (#PCDATA)> +<!-- + The cp_value contains the codepoint of the character in a particular + standard. The standard will be identified in the cp_type attribute. + --><!ATTLIST cp_value cp_type CDATA #REQUIRED> +<!-- + The cp_type attribute states the coding standard applying to the + element. The values assigned so far are: + jis208 - JIS X 0208-1997 - kuten coding (nn-nn) + jis212 - JIS X 0212-1990 - kuten coding (nn-nn) + jis213 - JIS X 0213-2000 - kuten coding (p-nn-nn) + ucs - Unicode 4.0 - hex coding (4 or 5 hexadecimal digits) + --><!ELEMENT radical (rad_value)+> +<!ELEMENT rad_value (#PCDATA)> +<!-- + The radical number, in the range 1 to 214. The particular + classification type is stated in the rad_type attribute. + --><!ATTLIST rad_value rad_type CDATA #REQUIRED> +<!-- + The rad_type attribute states the type of radical classification. + classical - as recorded in the KangXi Zidian. + nelson - as used in the Nelson "Modern Japanese-English + Character Dictionary" (i.e. the Classic, not the New Nelson). + This will only be used where Nelson reclassified the kanji. + --><!ELEMENT misc (grade? , stroke_count+ , variant* , freq* , rad_name*)> +<!ELEMENT grade (#PCDATA)> +<!-- + The Jouyou Kanji grade level. 1 through 6 indicate the grade in which + the kanji is taught in Japanese schools. 8 indicates it is one of the + remaining Jouyou Kanji to be learned in junior high school, and 9 + indicates it is a Jinmeiyou (for use in names) kanji. [G] + --><!ELEMENT stroke_count (#PCDATA)> +<!-- + The stroke count of the kanji, including the radical. If more than + one, the first is considered the accepted count, while subsequent ones + are common miscounts. (See Appendix E. of the KANJIDIC documentation + for some of the rules applied when counting strokes in some of the + radicals.) [S] + --><!ELEMENT variant (#PCDATA)> +<!-- + A cross-reference code to another kanji, usually regarded as a variant. + The type of cross-reference is given in the var_type attribute. + --><!ATTLIST variant var_type CDATA #REQUIRED> +<!-- + The var_type attribute indicates the type of variant code. The current + values are: + jis208 - in JIS X 0208 - kuten coding + jis212 - in JIS X 0212 - kuten coding + jis213 - in JIS X 0213 - kuten coding + deroo - De Roo number - numeric + njecd - Halpern NJECD index number - numeric + s_h - The Kanji Dictionary (Spahn & Hadamitzky) - descriptor + nelson - "Classic" Nelson - numeric + oneill - Japanese Names (O'Neill) - numeric + --><!ELEMENT freq (#PCDATA)> +<!-- + A frequency-of-use ranking. The 2,500 most-used characters have a + ranking; those characters that lack this field are not ranked. The + frequency is a number from 1 to 2,500 that expresses the relative + frequency of occurrence of a character in modern Japanese. This is + based on a survey in newspapers, so it is biassed towards kanji + used in newspaper articles. The discrimination between the less + frequently used kanji is not strong. + --><!ELEMENT rad_name (#PCDATA)> +<!-- + When the kanji is itself a radical and has a name, this element + contains the name (in hiragana.) [T2] + --><!ELEMENT dic_number (dic_ref)+> +<!-- + This element contains the index numbers and similar unstructured + information such as page numbers in a number of published dictionaries, + and instructional books on kanji. + --><!ELEMENT dic_ref (#PCDATA)> +<!-- + Each dic_ref contains an index number. The particular dictionary, + etc. is defined by the dr_type attribute. + --><!ATTLIST dic_ref dr_type CDATA #REQUIRED> +<!-- + The dr_type defines the dictionary or reference book, etc. to which + dic_ref element applies. The initial allocation is: + nelson_c - "Modern Reader's Japanese-English Character Dictionary", + edited by Andrew Nelson (now published as the "Classic" + Nelson). + nelson_n - "The New Nelson Japanese-English Character Dictionary", + edited by John Haig. + halpern_njecd - "New Japanese-English Character Dictionary", + edited by Jack Halpern. + halpern_kkld - "Kanji Learners Dictionary" (Kodansha) edited by + Jack Halpern. + heisig - "Remembering The Kanji" by James Heisig. + gakken - "A New Dictionary of Kanji Usage" (Gakken) + oneill_names - "Japanese Names", by P.G. O'Neill. + oneill_kk - "Essential Kanji" by P.G. O'Neill. + moro - "Daikanwajiten" compiled by Morohashi. For some kanji two + additional attributes are used: m_vol: the volume of the + dictionary in which the kanji is found, and m_page: the page + number in the volume. + henshall - "A Guide To Remembering Japanese Characters" by + Kenneth G. Henshall. + sh_kk - "Kanji and Kana" by Spahn and Hadamitzky. + sakade - "A Guide To Reading and Writing Japanese" edited by + Florence Sakade. + henshall3 - "A Guide To Reading and Writing Japanese" 3rd + edition, edited by Henshall, Seeley and De Groot. + tutt_cards - Tuttle Kanji Cards, compiled by Alexander Kask. + crowley - "The Kanji Way to Japanese Language Power" by + Dale Crowley. + kanji_in_context - "Kanji in Context" by Nishiguchi and Kono. + busy_people - "Japanese For Busy People" vols I-III, published + by the AJLT. The codes are the volume.chapter. + kodansha_compact - the "Kodansha Compact Kanji Guide". + --><!ATTLIST dic_ref m_vol CDATA #IMPLIED> +<!-- + See above under "moro". + --><!ATTLIST dic_ref m_page CDATA #IMPLIED> +<!-- + See above under "moro". + --><!ELEMENT query_code (q_code)+> +<!-- + These codes contain information relating to the glyph, and can be used + for finding a required kanji. The type of code is defined by the + qc_type attribute. + --><!ELEMENT q_code (#PCDATA)> +<!-- + The q_code contains the actual query-code value, according to the + qc_type attribute. + --><!ATTLIST q_code qc_type CDATA #REQUIRED> +<!-- + The q_code attribute defines the type of query code. The current values + are: + skip - Halpern's SKIP (System of Kanji Indexing by Patterns) + code. The format is n-nn-nn. See the KANJIDIC documentation + for a description of the code and restrictions on the + commercial use of this data. [P] + + sh_desc - the descriptor codes for The Kanji Dictionary (Tuttle + 1996) by Spahn and Hadamitzky. They are in the form nxnn.n, + e.g. 3k11.2, where the kanji has 3 strokes in the + identifying radical, it is radical "k" in the SH + classification system, there are 11 other strokes, and it is + the 2nd kanji in the 3k11 sequence. (I am very grateful to + Mark Spahn for providing the list of these descriptor codes + for the kanji in this file.) [I] + four_corner - the "Four Corner" code for the kanji. This is a code + invented by Wang Chen in 1928. See the KANJIDIC documentation + for an overview of the Four Corner System. [Q] + + deroo - the codes developed by the late Father Joseph De Roo, and + published in his book "2001 Kanji" (Bojinsha). Fr De Roo + gave his permission for these codes to be included. [DR] + misclass - a possible misclassification of the kanji according + to one of the code types. (See the "Z" codes in the KANJIDIC + documentation for more details.) + + --><!ELEMENT reading_meaning (rmgroup* , nanori*)> +<!-- + The readings for the kanji in several languages, and the meanings, also + in several languages. The readings and meanings are grouped to enable + the handling of the situation where the meaning is differentiated by + reading. [T1] + --><!ELEMENT nanori (#PCDATA)> +<!-- + Japanese readings that are now only associated with names. + --><!ELEMENT rmgroup (reading* , meaning*)> +<!ELEMENT reading (#PCDATA)> +<!-- + The reading element contains the reading or pronunciation + of the kanji. + --><!ATTLIST reading r_type CDATA #REQUIRED> +<!-- + The r_type attribute defines the type of reading in the reading + element. The current values are: + pinyin - the modern PinYin romanization of the Chinese reading + of the kanji. The tones are represented by a concluding + digit. [Y] + korean_r - the romanized form of the Korean reading(s) of the + kanji. The readings are in the (Republic of Korea) Ministry + of Education style of romanization. [W] + korean_h - the Korean reading(s) of the kanji in hangul. + ja_on - the "on" Japanese reading of the kanji, in katakana. A + second attribute r_status, if present, will indicate with + a value of "jy" whether the reading is approved for a + "Jouyou kanji". + ja_kun - the "kun" Japanese reading of the kanji, in hiragana. + Where relevant the okurigana is also included separated by a + ".". Readings associated with prefixes and suffixes are + marked with a "-". A second attribute r_status, if present, + will indicate with a value of "jy" whether the reading is + approved for a "Jouyou kanji". + --><!ATTLIST reading r_status CDATA #IMPLIED> +<!-- + See under ja_on and ja_kun above. + --><!ELEMENT meaning (#PCDATA)> +<!-- + The meaning associated with the kanji. + --><!ATTLIST meaning m_lang CDATA #IMPLIED> +<!-- + The m_lang attribute defines the target language of the meaning. It + will be coded using the two-letter language code from the ISO 639 + standard. When absent, the value "en" (i.e. English) is implied. [{}] + -->]> +<kanjidic2> +</kanjidic2> diff --git a/result/intsubset2.xml.rde b/result/intsubset2.xml.rde new file mode 100644 index 0000000..d27e245 --- /dev/null +++ b/result/intsubset2.xml.rde @@ -0,0 +1,5 @@ +0 10 kanjidic2 0 0 +0 1 kanjidic2 0 0 +1 14 #text 0 1 + +0 15 kanjidic2 0 0 diff --git a/result/intsubset2.xml.rdr b/result/intsubset2.xml.rdr new file mode 100644 index 0000000..d27e245 --- /dev/null +++ b/result/intsubset2.xml.rdr @@ -0,0 +1,5 @@ +0 10 kanjidic2 0 0 +0 1 kanjidic2 0 0 +1 14 #text 0 1 + +0 15 kanjidic2 0 0 diff --git a/result/intsubset2.xml.sax b/result/intsubset2.xml.sax new file mode 100644 index 0000000..b4d7bf4 --- /dev/null +++ b/result/intsubset2.xml.sax @@ -0,0 +1,286 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.internalSubset(kanjidic2, , ) +SAX.comment( Version 1.3 + This is the DTD of the XML-format kanji file combining information from + the KANJIDIC and KANJD212 files. It is intended to be largely self- + documenting, with each field being accompanied by an explanatory + comment. + + The file covers the following kanji: + (a) the 6,355 kanji from JIS X 0208; + (b) the 5,801 kanji from JIS X 0212; + (c) the 3,625 kanji from JIS X 0213 as follows: + (i) the 2,741 kanji which are also in JIS X 0212 have + JIS X 0213 code-points (kuten) added to the existing entry; + (ii) the 884 "new" kanji have new entries. + + At the end of the explanation for a number of fields there is a tag + with the format [N]. This indicates the leading letter(s) of the + equivalent field in the KANJIDIC and KANJD212 files. + + The KANJIDIC documentation should also be read for additional + information about the information in the file. + ) +SAX.elementDecl(kanjidic2, 4, ...) +SAX.elementDecl(header, 4, ...) +SAX.comment( + The single header element will contain identification information + about the version of the file + ) +SAX.elementDecl(file_version, 3, ...) +SAX.comment( + This field denotes the version of kanjidic2 structure, as more + than one version may exist. + ) +SAX.elementDecl(database_version, 3, ...) +SAX.comment( + The version of the file, in the format YYYY-NN, where NN will be + a number starting with 01 for the first version released in a + calendar year, then increasing for each version in that year. + ) +SAX.elementDecl(date_of_creation, 3, ...) +SAX.comment( + The date the file was created in international format (YYYY-MM-DD). + ) +SAX.elementDecl(character, 4, ...) +SAX.elementDecl(literal, 3, ...) +SAX.comment( + The character itself in UTF8 coding. + ) +SAX.elementDecl(codepoint, 4, ...) +SAX.comment( + The codepoint element states the code of the character in the various + character set standards. + ) +SAX.elementDecl(cp_value, 3, ...) +SAX.comment( + The cp_value contains the codepoint of the character in a particular + standard. The standard will be identified in the cp_type attribute. + ) +SAX.attributeDecl(cp_value, cp_type, 1, 2, NULL, ...) +SAX.comment( + The cp_type attribute states the coding standard applying to the + element. The values assigned so far are: + jis208 - JIS X 0208-1997 - kuten coding (nn-nn) + jis212 - JIS X 0212-1990 - kuten coding (nn-nn) + jis213 - JIS X 0213-2000 - kuten coding (p-nn-nn) + ucs - Unicode 4.0 - hex coding (4 or 5 hexadecimal digits) + ) +SAX.elementDecl(radical, 4, ...) +SAX.elementDecl(rad_value, 3, ...) +SAX.comment( + The radical number, in the range 1 to 214. The particular + classification type is stated in the rad_type attribute. + ) +SAX.attributeDecl(rad_value, rad_type, 1, 2, NULL, ...) +SAX.comment( + The rad_type attribute states the type of radical classification. + classical - as recorded in the KangXi Zidian. + nelson - as used in the Nelson "Modern Japanese-English + Character Dictionary" (i.e. the Classic, not the New Nelson). + This will only be used where Nelson reclassified the kanji. + ) +SAX.elementDecl(misc, 4, ...) +SAX.elementDecl(grade, 3, ...) +SAX.comment( + The Jouyou Kanji grade level. 1 through 6 indicate the grade in which + the kanji is taught in Japanese schools. 8 indicates it is one of the + remaining Jouyou Kanji to be learned in junior high school, and 9 + indicates it is a Jinmeiyou (for use in names) kanji. [G] + ) +SAX.elementDecl(stroke_count, 3, ...) +SAX.comment( + The stroke count of the kanji, including the radical. If more than + one, the first is considered the accepted count, while subsequent ones + are common miscounts. (See Appendix E. of the KANJIDIC documentation + for some of the rules applied when counting strokes in some of the + radicals.) [S] + ) +SAX.elementDecl(variant, 3, ...) +SAX.comment( + A cross-reference code to another kanji, usually regarded as a variant. + The type of cross-reference is given in the var_type attribute. + ) +SAX.attributeDecl(variant, var_type, 1, 2, NULL, ...) +SAX.comment( + The var_type attribute indicates the type of variant code. The current + values are: + jis208 - in JIS X 0208 - kuten coding + jis212 - in JIS X 0212 - kuten coding + jis213 - in JIS X 0213 - kuten coding + deroo - De Roo number - numeric + njecd - Halpern NJECD index number - numeric + s_h - The Kanji Dictionary (Spahn & Hadamitzky) - descriptor + nelson - "Classic" Nelson - numeric + oneill - Japanese Names (O'Neill) - numeric + ) +SAX.elementDecl(freq, 3, ...) +SAX.comment( + A frequency-of-use ranking. The 2,500 most-used characters have a + ranking; those characters that lack this field are not ranked. The + frequency is a number from 1 to 2,500 that expresses the relative + frequency of occurrence of a character in modern Japanese. This is + based on a survey in newspapers, so it is biassed towards kanji + used in newspaper articles. The discrimination between the less + frequently used kanji is not strong. + ) +SAX.elementDecl(rad_name, 3, ...) +SAX.comment( + When the kanji is itself a radical and has a name, this element + contains the name (in hiragana.) [T2] + ) +SAX.elementDecl(dic_number, 4, ...) +SAX.comment( + This element contains the index numbers and similar unstructured + information such as page numbers in a number of published dictionaries, + and instructional books on kanji. + ) +SAX.elementDecl(dic_ref, 3, ...) +SAX.comment( + Each dic_ref contains an index number. The particular dictionary, + etc. is defined by the dr_type attribute. + ) +SAX.attributeDecl(dic_ref, dr_type, 1, 2, NULL, ...) +SAX.comment( + The dr_type defines the dictionary or reference book, etc. to which + dic_ref element applies. The initial allocation is: + nelson_c - "Modern Reader's Japanese-English Character Dictionary", + edited by Andrew Nelson (now published as the "Classic" + Nelson). + nelson_n - "The New Nelson Japanese-English Character Dictionary", + edited by John Haig. + halpern_njecd - "New Japanese-English Character Dictionary", + edited by Jack Halpern. + halpern_kkld - "Kanji Learners Dictionary" (Kodansha) edited by + Jack Halpern. + heisig - "Remembering The Kanji" by James Heisig. + gakken - "A New Dictionary of Kanji Usage" (Gakken) + oneill_names - "Japanese Names", by P.G. O'Neill. + oneill_kk - "Essential Kanji" by P.G. O'Neill. + moro - "Daikanwajiten" compiled by Morohashi. For some kanji two + additional attributes are used: m_vol: the volume of the + dictionary in which the kanji is found, and m_page: the page + number in the volume. + henshall - "A Guide To Remembering Japanese Characters" by + Kenneth G. Henshall. + sh_kk - "Kanji and Kana" by Spahn and Hadamitzky. + sakade - "A Guide To Reading and Writing Japanese" edited by + Florence Sakade. + henshall3 - "A Guide To Reading and Writing Japanese" 3rd + edition, edited by Henshall, Seeley and De Groot. + tutt_cards - Tuttle Kanji Cards, compiled by Alexander Kask. + crowley - "The Kanji Way to Japanese Language Power" by + Dale Crowley. + kanji_in_context - "Kanji in Context" by Nishiguchi and Kono. + busy_people - "Japanese For Busy People" vols I-III, published + by the AJLT. The codes are the volume.chapter. + kodansha_compact - the "Kodansha Compact Kanji Guide". + ) +SAX.attributeDecl(dic_ref, m_vol, 1, 3, NULL, ...) +SAX.comment( + See above under "moro". + ) +SAX.attributeDecl(dic_ref, m_page, 1, 3, NULL, ...) +SAX.comment( + See above under "moro". + ) +SAX.elementDecl(query_code, 4, ...) +SAX.comment( + These codes contain information relating to the glyph, and can be used + for finding a required kanji. The type of code is defined by the + qc_type attribute. + ) +SAX.elementDecl(q_code, 3, ...) +SAX.comment( + The q_code contains the actual query-code value, according to the + qc_type attribute. + ) +SAX.attributeDecl(q_code, qc_type, 1, 2, NULL, ...) +SAX.comment( + The q_code attribute defines the type of query code. The current values + are: + skip - Halpern's SKIP (System of Kanji Indexing by Patterns) + code. The format is n-nn-nn. See the KANJIDIC documentation + for a description of the code and restrictions on the + commercial use of this data. [P] + + sh_desc - the descriptor codes for The Kanji Dictionary (Tuttle + 1996) by Spahn and Hadamitzky. They are in the form nxnn.n, + e.g. 3k11.2, where the kanji has 3 strokes in the + identifying radical, it is radical "k" in the SH + classification system, there are 11 other strokes, and it is + the 2nd kanji in the 3k11 sequence. (I am very grateful to + Mark Spahn for providing the list of these descriptor codes + for the kanji in this file.) [I] + four_corner - the "Four Corner" code for the kanji. This is a code + invented by Wang Chen in 1928. See the KANJIDIC documentation + for an overview of the Four Corner System. [Q] + + deroo - the codes developed by the late Father Joseph De Roo, and + published in his book "2001 Kanji" (Bojinsha). Fr De Roo + gave his permission for these codes to be included. [DR] + misclass - a possible misclassification of the kanji according + to one of the code types. (See the "Z" codes in the KANJIDIC + documentation for more details.) + + ) +SAX.elementDecl(reading_meaning, 4, ...) +SAX.comment( + The readings for the kanji in several languages, and the meanings, also + in several languages. The readings and meanings are grouped to enable + the handling of the situation where the meaning is differentiated by + reading. [T1] + ) +SAX.elementDecl(nanori, 3, ...) +SAX.comment( + Japanese readings that are now only associated with names. + ) +SAX.elementDecl(rmgroup, 4, ...) +SAX.elementDecl(reading, 3, ...) +SAX.comment( + The reading element contains the reading or pronunciation + of the kanji. + ) +SAX.attributeDecl(reading, r_type, 1, 2, NULL, ...) +SAX.comment( + The r_type attribute defines the type of reading in the reading + element. The current values are: + pinyin - the modern PinYin romanization of the Chinese reading + of the kanji. The tones are represented by a concluding + digit. [Y] + korean_r - the romanized form of the Korean reading(s) of the + kanji. The readings are in the (Republic of Korea) Ministry + of Education style of romanization. [W] + korean_h - the Korean reading(s) of the kanji in hangul. + ja_on - the "on" Japanese reading of the kanji, in katakana. A + second attribute r_status, if present, will indicate with + a value of "jy" whether the reading is approved for a + "Jouyou kanji". + ja_kun - the "kun" Japanese reading of the kanji, in hiragana. + Where relevant the okurigana is also included separated by a + ".". Readings associated with prefixes and suffixes are + marked with a "-". A second attribute r_status, if present, + will indicate with a value of "jy" whether the reading is + approved for a "Jouyou kanji". + ) +SAX.attributeDecl(reading, r_status, 1, 3, NULL, ...) +SAX.comment( + See under ja_on and ja_kun above. + ) +SAX.elementDecl(meaning, 3, ...) +SAX.comment( + The meaning associated with the kanji. + ) +SAX.attributeDecl(meaning, m_lang, 1, 3, NULL, ...) +SAX.comment( + The m_lang attribute defines the target language of the meaning. It + will be coded using the two-letter language code from the ISO 639 + standard. When absent, the value "en" (i.e. English) is implied. [{}] + ) +SAX.externalSubset(kanjidic2, , ) +SAX.startElement(kanjidic2) +SAX.characters( +, 1) +SAX.endElement(kanjidic2) +SAX.endDocument() diff --git a/result/noent/comment3.xml b/result/noent/comment3.xml new file mode 100644 index 0000000..395f67c --- /dev/null +++ b/result/noent/comment3.xml @@ -0,0 +1,164 @@ +<?xml version="1.0" encoding="ISO-8859-1"?> +<!-- test of very very long comments and buffer limits +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +01234567890123456789012345678901234567890123456789 +--> +<doc/> diff --git a/result/noent/comment4.xml b/result/noent/comment4.xml new file mode 100644 index 0000000..93282d8 --- /dev/null +++ b/result/noent/comment4.xml @@ -0,0 +1,5 @@ +<?xml version="1.0" encoding="ISO-8859-1"?> +<!-- test of non ascii comments like là et très --> +<!--à another one --> +<!-- another one à--> +<doc/> diff --git a/result/noent/comment5.xml b/result/noent/comment5.xml new file mode 100644 index 0000000..398f974 --- /dev/null +++ b/result/noent/comment5.xml @@ -0,0 +1,9 @@ +<?xml version="1.0" encoding="ISO-8859-1"?> +<!-- test of hyphen and line break handling + some text - interrupted - +- - - - - - - - - - - - - - - - - - - - - - + this should stop here^ + + +--> +<doc/> diff --git a/result/noent/intsubset2.xml b/result/noent/intsubset2.xml new file mode 100644 index 0000000..b103955 --- /dev/null +++ b/result/noent/intsubset2.xml @@ -0,0 +1,250 @@ +<?xml version="1.0"?> +<!DOCTYPE kanjidic2 [ +<!-- Version 1.3 + This is the DTD of the XML-format kanji file combining information from + the KANJIDIC and KANJD212 files. It is intended to be largely self- + documenting, with each field being accompanied by an explanatory + comment. + + The file covers the following kanji: + (a) the 6,355 kanji from JIS X 0208; + (b) the 5,801 kanji from JIS X 0212; + (c) the 3,625 kanji from JIS X 0213 as follows: + (i) the 2,741 kanji which are also in JIS X 0212 have + JIS X 0213 code-points (kuten) added to the existing entry; + (ii) the 884 "new" kanji have new entries. + + At the end of the explanation for a number of fields there is a tag + with the format [N]. This indicates the leading letter(s) of the + equivalent field in the KANJIDIC and KANJD212 files. + + The KANJIDIC documentation should also be read for additional + information about the information in the file. + --><!ELEMENT kanjidic2 (header , character*)> +<!ELEMENT header (file_version , database_version , date_of_creation)> +<!-- + The single header element will contain identification information + about the version of the file + --><!ELEMENT file_version (#PCDATA)> +<!-- + This field denotes the version of kanjidic2 structure, as more + than one version may exist. + --><!ELEMENT database_version (#PCDATA)> +<!-- + The version of the file, in the format YYYY-NN, where NN will be + a number starting with 01 for the first version released in a + calendar year, then increasing for each version in that year. + --><!ELEMENT date_of_creation (#PCDATA)> +<!-- + The date the file was created in international format (YYYY-MM-DD). + --><!ELEMENT character (literal , codepoint , radical , misc , dic_number? , query_code? , reading_meaning? , nanori?)*> +<!ELEMENT literal (#PCDATA)> +<!-- + The character itself in UTF8 coding. + --><!ELEMENT codepoint (cp_value)+> +<!-- + The codepoint element states the code of the character in the various + character set standards. + --><!ELEMENT cp_value (#PCDATA)> +<!-- + The cp_value contains the codepoint of the character in a particular + standard. The standard will be identified in the cp_type attribute. + --><!ATTLIST cp_value cp_type CDATA #REQUIRED> +<!-- + The cp_type attribute states the coding standard applying to the + element. The values assigned so far are: + jis208 - JIS X 0208-1997 - kuten coding (nn-nn) + jis212 - JIS X 0212-1990 - kuten coding (nn-nn) + jis213 - JIS X 0213-2000 - kuten coding (p-nn-nn) + ucs - Unicode 4.0 - hex coding (4 or 5 hexadecimal digits) + --><!ELEMENT radical (rad_value)+> +<!ELEMENT rad_value (#PCDATA)> +<!-- + The radical number, in the range 1 to 214. The particular + classification type is stated in the rad_type attribute. + --><!ATTLIST rad_value rad_type CDATA #REQUIRED> +<!-- + The rad_type attribute states the type of radical classification. + classical - as recorded in the KangXi Zidian. + nelson - as used in the Nelson "Modern Japanese-English + Character Dictionary" (i.e. the Classic, not the New Nelson). + This will only be used where Nelson reclassified the kanji. + --><!ELEMENT misc (grade? , stroke_count+ , variant* , freq* , rad_name*)> +<!ELEMENT grade (#PCDATA)> +<!-- + The Jouyou Kanji grade level. 1 through 6 indicate the grade in which + the kanji is taught in Japanese schools. 8 indicates it is one of the + remaining Jouyou Kanji to be learned in junior high school, and 9 + indicates it is a Jinmeiyou (for use in names) kanji. [G] + --><!ELEMENT stroke_count (#PCDATA)> +<!-- + The stroke count of the kanji, including the radical. If more than + one, the first is considered the accepted count, while subsequent ones + are common miscounts. (See Appendix E. of the KANJIDIC documentation + for some of the rules applied when counting strokes in some of the + radicals.) [S] + --><!ELEMENT variant (#PCDATA)> +<!-- + A cross-reference code to another kanji, usually regarded as a variant. + The type of cross-reference is given in the var_type attribute. + --><!ATTLIST variant var_type CDATA #REQUIRED> +<!-- + The var_type attribute indicates the type of variant code. The current + values are: + jis208 - in JIS X 0208 - kuten coding + jis212 - in JIS X 0212 - kuten coding + jis213 - in JIS X 0213 - kuten coding + deroo - De Roo number - numeric + njecd - Halpern NJECD index number - numeric + s_h - The Kanji Dictionary (Spahn & Hadamitzky) - descriptor + nelson - "Classic" Nelson - numeric + oneill - Japanese Names (O'Neill) - numeric + --><!ELEMENT freq (#PCDATA)> +<!-- + A frequency-of-use ranking. The 2,500 most-used characters have a + ranking; those characters that lack this field are not ranked. The + frequency is a number from 1 to 2,500 that expresses the relative + frequency of occurrence of a character in modern Japanese. This is + based on a survey in newspapers, so it is biassed towards kanji + used in newspaper articles. The discrimination between the less + frequently used kanji is not strong. + --><!ELEMENT rad_name (#PCDATA)> +<!-- + When the kanji is itself a radical and has a name, this element + contains the name (in hiragana.) [T2] + --><!ELEMENT dic_number (dic_ref)+> +<!-- + This element contains the index numbers and similar unstructured + information such as page numbers in a number of published dictionaries, + and instructional books on kanji. + --><!ELEMENT dic_ref (#PCDATA)> +<!-- + Each dic_ref contains an index number. The particular dictionary, + etc. is defined by the dr_type attribute. + --><!ATTLIST dic_ref dr_type CDATA #REQUIRED> +<!-- + The dr_type defines the dictionary or reference book, etc. to which + dic_ref element applies. The initial allocation is: + nelson_c - "Modern Reader's Japanese-English Character Dictionary", + edited by Andrew Nelson (now published as the "Classic" + Nelson). + nelson_n - "The New Nelson Japanese-English Character Dictionary", + edited by John Haig. + halpern_njecd - "New Japanese-English Character Dictionary", + edited by Jack Halpern. + halpern_kkld - "Kanji Learners Dictionary" (Kodansha) edited by + Jack Halpern. + heisig - "Remembering The Kanji" by James Heisig. + gakken - "A New Dictionary of Kanji Usage" (Gakken) + oneill_names - "Japanese Names", by P.G. O'Neill. + oneill_kk - "Essential Kanji" by P.G. O'Neill. + moro - "Daikanwajiten" compiled by Morohashi. For some kanji two + additional attributes are used: m_vol: the volume of the + dictionary in which the kanji is found, and m_page: the page + number in the volume. + henshall - "A Guide To Remembering Japanese Characters" by + Kenneth G. Henshall. + sh_kk - "Kanji and Kana" by Spahn and Hadamitzky. + sakade - "A Guide To Reading and Writing Japanese" edited by + Florence Sakade. + henshall3 - "A Guide To Reading and Writing Japanese" 3rd + edition, edited by Henshall, Seeley and De Groot. + tutt_cards - Tuttle Kanji Cards, compiled by Alexander Kask. + crowley - "The Kanji Way to Japanese Language Power" by + Dale Crowley. + kanji_in_context - "Kanji in Context" by Nishiguchi and Kono. + busy_people - "Japanese For Busy People" vols I-III, published + by the AJLT. The codes are the volume.chapter. + kodansha_compact - the "Kodansha Compact Kanji Guide". + --><!ATTLIST dic_ref m_vol CDATA #IMPLIED> +<!-- + See above under "moro". + --><!ATTLIST dic_ref m_page CDATA #IMPLIED> +<!-- + See above under "moro". + --><!ELEMENT query_code (q_code)+> +<!-- + These codes contain information relating to the glyph, and can be used + for finding a required kanji. The type of code is defined by the + qc_type attribute. + --><!ELEMENT q_code (#PCDATA)> +<!-- + The q_code contains the actual query-code value, according to the + qc_type attribute. + --><!ATTLIST q_code qc_type CDATA #REQUIRED> +<!-- + The q_code attribute defines the type of query code. The current values + are: + skip - Halpern's SKIP (System of Kanji Indexing by Patterns) + code. The format is n-nn-nn. See the KANJIDIC documentation + for a description of the code and restrictions on the + commercial use of this data. [P] + + sh_desc - the descriptor codes for The Kanji Dictionary (Tuttle + 1996) by Spahn and Hadamitzky. They are in the form nxnn.n, + e.g. 3k11.2, where the kanji has 3 strokes in the + identifying radical, it is radical "k" in the SH + classification system, there are 11 other strokes, and it is + the 2nd kanji in the 3k11 sequence. (I am very grateful to + Mark Spahn for providing the list of these descriptor codes + for the kanji in this file.) [I] + four_corner - the "Four Corner" code for the kanji. This is a code + invented by Wang Chen in 1928. See the KANJIDIC documentation + for an overview of the Four Corner System. [Q] + + deroo - the codes developed by the late Father Joseph De Roo, and + published in his book "2001 Kanji" (Bojinsha). Fr De Roo + gave his permission for these codes to be included. [DR] + misclass - a possible misclassification of the kanji according + to one of the code types. (See the "Z" codes in the KANJIDIC + documentation for more details.) + + --><!ELEMENT reading_meaning (rmgroup* , nanori*)> +<!-- + The readings for the kanji in several languages, and the meanings, also + in several languages. The readings and meanings are grouped to enable + the handling of the situation where the meaning is differentiated by + reading. [T1] + --><!ELEMENT nanori (#PCDATA)> +<!-- + Japanese readings that are now only associated with names. + --><!ELEMENT rmgroup (reading* , meaning*)> +<!ELEMENT reading (#PCDATA)> +<!-- + The reading element contains the reading or pronunciation + of the kanji. + --><!ATTLIST reading r_type CDATA #REQUIRED> +<!-- + The r_type attribute defines the type of reading in the reading + element. The current values are: + pinyin - the modern PinYin romanization of the Chinese reading + of the kanji. The tones are represented by a concluding + digit. [Y] + korean_r - the romanized form of the Korean reading(s) of the + kanji. The readings are in the (Republic of Korea) Ministry + of Education style of romanization. [W] + korean_h - the Korean reading(s) of the kanji in hangul. + ja_on - the "on" Japanese reading of the kanji, in katakana. A + second attribute r_status, if present, will indicate with + a value of "jy" whether the reading is approved for a + "Jouyou kanji". + ja_kun - the "kun" Japanese reading of the kanji, in hiragana. + Where relevant the okurigana is also included separated by a + ".". Readings associated with prefixes and suffixes are + marked with a "-". A second attribute r_status, if present, + will indicate with a value of "jy" whether the reading is + approved for a "Jouyou kanji". + --><!ATTLIST reading r_status CDATA #IMPLIED> +<!-- + See under ja_on and ja_kun above. + --><!ELEMENT meaning (#PCDATA)> +<!-- + The meaning associated with the kanji. + --><!ATTLIST meaning m_lang CDATA #IMPLIED> +<!-- + The m_lang attribute defines the target language of the meaning. It + will be coded using the two-letter language code from the ISO 639 + standard. When absent, the value "en" (i.e. English) is implied. [{}] + -->]> +<kanjidic2> +</kanjidic2> diff --git a/result/pattern/conj b/result/pattern/conj new file mode 100644 index 0000000..616450b --- /dev/null +++ b/result/pattern/conj @@ -0,0 +1,47 @@ +Node /a matches pattern a|b +Node /a/b matches pattern a|b +Node /a/b/c[1]/b matches pattern a|b +Node /a/b/c[2]/b matches pattern a|b +Node /a/b/c[2]/c/b matches pattern a|b +Node /a/c/b matches pattern a|b +Node /a matches pattern a|c +Node /a/b/c[1] matches pattern a|c +Node /a/b/c[2] matches pattern a|c +Node /a/b/c[2]/c matches pattern a|c +Node /a/c matches pattern a|c +Node /a/b matches pattern b|c +Node /a/b/c[1] matches pattern b|c +Node /a/b/c[1]/b matches pattern b|c +Node /a/b/c[2] matches pattern b|c +Node /a/b/c[2]/b matches pattern b|c +Node /a/b/c[2]/c matches pattern b|c +Node /a/b/c[2]/c/b matches pattern b|c +Node /a/c matches pattern b|c +Node /a/c/b matches pattern b|c +Node /a matches pattern a|b|c +Node /a/b matches pattern a|b|c +Node /a/b/c[1] matches pattern a|b|c +Node /a/b/c[1]/b matches pattern a|b|c +Node /a/b/c[2] matches pattern a|b|c +Node /a/b/c[2]/b matches pattern a|b|c +Node /a/b/c[2]/c matches pattern a|b|c +Node /a/b/c[2]/c/b matches pattern a|b|c +Node /a/c matches pattern a|b|c +Node /a/c/b matches pattern a|b|c +Node /a matches pattern /a|b +Node /a/b matches pattern /a|b +Node /a/b/c[1]/b matches pattern /a|b +Node /a/b/c[2]/b matches pattern /a|b +Node /a/b/c[2]/c/b matches pattern /a|b +Node /a/c/b matches pattern /a|b +Node /a matches pattern b|/a +Node /a/b matches pattern b|/a +Node /a/b/c[1]/b matches pattern b|/a +Node /a/b/c[2]/b matches pattern b|/a +Node /a/b/c[2]/c/b matches pattern b|/a +Node /a/c/b matches pattern b|/a +Node /a/b/c[1] matches pattern a//c|b//c +Node /a/b/c[2] matches pattern a//c|b//c +Node /a/b/c[2]/c matches pattern a//c|b//c +Node /a/c matches pattern a//c|b//c +Node /a matches pattern d|e|f|g|h|a diff --git a/result/pattern/multiple b/result/pattern/multiple new file mode 100644 index 0000000..e10390e --- /dev/null +++ b/result/pattern/multiple @@ -0,0 +1,91 @@ +Node /c/b[1]/a[1] matches pattern a +Node /c/b[1]/a[2] matches pattern a +Node /c/c/b/a[1] matches pattern a +Node /c/c/b/a[2] matches pattern a +Node /c/b[2]/a[1] matches pattern a +Node /c/b[2]/a[2] matches pattern a +Node /c/b[1] matches pattern b +Node /c/c/b matches pattern b +Node /c/b[2] matches pattern b +Node /c matches pattern c +Node /c/c matches pattern c +Node /c/b[1] matches pattern c/b +Node /c/c/b matches pattern c/b +Node /c/b[2] matches pattern c/b +Node /c/b[1]/a[1] matches pattern b/a +Node /c/b[1]/a[2] matches pattern b/a +Node /c/c/b/a[1] matches pattern b/a +Node /c/c/b/a[2] matches pattern b/a +Node /c/b[2]/a[1] matches pattern b/a +Node /c/b[2]/a[2] matches pattern b/a +Node /c/b[1]/a[1] matches pattern c/b/a +Node /c/b[1]/a[2] matches pattern c/b/a +Node /c/c/b/a[1] matches pattern c/b/a +Node /c/c/b/a[2] matches pattern c/b/a +Node /c/b[2]/a[1] matches pattern c/b/a +Node /c/b[2]/a[2] matches pattern c/b/a +Node /c/b[1]/a[1] matches pattern c//a +Node /c/b[1]/a[2] matches pattern c//a +Node /c/c/b/a[1] matches pattern c//a +Node /c/c/b/a[2] matches pattern c//a +Node /c/b[2]/a[1] matches pattern c//a +Node /c/b[2]/a[2] matches pattern c//a +Node /c/b[1] matches pattern c//b +Node /c/c/b matches pattern c//b +Node /c/b[2] matches pattern c//b +Node /c/b[1]/a[1] matches pattern b//a +Node /c/b[1]/a[2] matches pattern b//a +Node /c/c/b/a[1] matches pattern b//a +Node /c/c/b/a[2] matches pattern b//a +Node /c/b[2]/a[1] matches pattern b//a +Node /c/b[2]/a[2] matches pattern b//a +Node /c/b[1]/a[1] matches pattern c//b//a +Node /c/b[1]/a[2] matches pattern c//b//a +Node /c/c/b/a[1] matches pattern c//b//a +Node /c/c/b/a[2] matches pattern c//b//a +Node /c/b[2]/a[1] matches pattern c//b//a +Node /c/b[2]/a[2] matches pattern c//b//a +Node /c/b[1]/a[1] matches pattern c/b//a +Node /c/b[1]/a[2] matches pattern c/b//a +Node /c/c/b/a[1] matches pattern c/b//a +Node /c/c/b/a[2] matches pattern c/b//a +Node /c/b[2]/a[1] matches pattern c/b//a +Node /c/b[2]/a[2] matches pattern c/b//a +Node /c/b[1]/a[1] matches pattern c//b/a +Node /c/b[1]/a[2] matches pattern c//b/a +Node /c/c/b/a[1] matches pattern c//b/a +Node /c/c/b/a[2] matches pattern c//b/a +Node /c/b[2]/a[1] matches pattern c//b/a +Node /c/b[2]/a[2] matches pattern c//b/a +Node /c matches pattern /c +Node /c/b[1] matches pattern /c/b +Node /c/b[2] matches pattern /c/b +Node /c/b[1]/a[1] matches pattern /c/b/a +Node /c/b[1]/a[2] matches pattern /c/b/a +Node /c/b[2]/a[1] matches pattern /c/b/a +Node /c/b[2]/a[2] matches pattern /c/b/a +Node /c/b[1]/a[1] matches pattern /c//a +Node /c/b[1]/a[2] matches pattern /c//a +Node /c/c/b/a[1] matches pattern /c//a +Node /c/c/b/a[2] matches pattern /c//a +Node /c/b[2]/a[1] matches pattern /c//a +Node /c/b[2]/a[2] matches pattern /c//a +Node /c/b[1] matches pattern /c//b +Node /c/c/b matches pattern /c//b +Node /c/b[2] matches pattern /c//b +Node /c/b[1]/a[1] matches pattern /c//b//a +Node /c/b[1]/a[2] matches pattern /c//b//a +Node /c/c/b/a[1] matches pattern /c//b//a +Node /c/c/b/a[2] matches pattern /c//b//a +Node /c/b[2]/a[1] matches pattern /c//b//a +Node /c/b[2]/a[2] matches pattern /c//b//a +Node /c/b[1]/a[1] matches pattern /c/b//a +Node /c/b[1]/a[2] matches pattern /c/b//a +Node /c/b[2]/a[1] matches pattern /c/b//a +Node /c/b[2]/a[2] matches pattern /c/b//a +Node /c/b[1]/a[1] matches pattern /c//b/a +Node /c/b[1]/a[2] matches pattern /c//b/a +Node /c/c/b/a[1] matches pattern /c//b/a +Node /c/c/b/a[2] matches pattern /c//b/a +Node /c/b[2]/a[1] matches pattern /c//b/a +Node /c/b[2]/a[2] matches pattern /c//b/a diff --git a/result/pattern/namespaces b/result/pattern/namespaces new file mode 100644 index 0000000..f8b3495 --- /dev/null +++ b/result/pattern/namespaces @@ -0,0 +1,20 @@ +Node /a matches pattern //a +Node /a/b:b/a matches pattern //a +Node /a/a/a matches pattern //a +Node /a/a:a matches pattern //a:a +Node /a/b:b/a/a:a matches pattern //a:a +Node /a/a matches pattern //a:a +Node /a/c:a matches pattern //a:a +Node /a/b matches pattern //b +Node /a/a:a/b:b matches pattern //b:b +Node /a/b:b matches pattern //b:b +Node /a/b:b/a/a:a/b:b matches pattern //b:b +Node /a/b:b/a matches pattern /a//a +Node /a/a/a matches pattern /a//a +Node /a/b matches pattern /a/b +Node /a/a:a matches pattern /a/a:a +Node /a/a matches pattern /a/a:a +Node /a/c:a matches pattern /a/a:a +Node /a/a:a matches pattern /a/c:a +Node /a/a matches pattern /a/c:a +Node /a/c:a matches pattern /a/c:a diff --git a/result/pattern/simple b/result/pattern/simple new file mode 100644 index 0000000..5710b5c --- /dev/null +++ b/result/pattern/simple @@ -0,0 +1,12 @@ +Node /a matches pattern a +Node /a/b matches pattern b +Node /a/b matches pattern b +Node /a matches pattern /a +Node /a/b matches pattern a/b +Node /a/b/c matches pattern a/b/c +Node /a matches pattern //a +Node /a/b matches pattern //b +Node /a/b/c matches pattern //c +Node /a/b matches pattern a//b +Node /a/b/c matches pattern a//c +Node /a/b/c matches pattern b//c diff --git a/result/schemas/anyAttr-processContents-err1_0_0.err b/result/schemas/anyAttr-processContents-err1_0_0.err index 84b3c98..160863d 100644 --- a/result/schemas/anyAttr-processContents-err1_0_0.err +++ b/result/schemas/anyAttr-processContents-err1_0_0.err @@ -1,2 +1,2 @@ ./test/schemas/anyAttr-processContents-err1_0.xml:11: element elem.lax: Schemas validity error : Element 'elem.lax', attribute 'foo:bar' ['language']: The value 'o o' is not valid. -./test/schemas/anyAttr-processContents-err1_0.xml:12: element elem.strict: Schemas validity error : Element 'elem.strict': The attribute 'foo:barB' is not allowed. +./test/schemas/anyAttr-processContents-err1_0.xml:12: element elem.strict: Schemas validity error : Element 'elem.strict', attribute 'foo:barB' [strict wildcard]: No global attribute declaration found, but stipulated by the strict processContents of the wildcard. diff --git a/result/schemas/bug167754_0_0 b/result/schemas/bug167754_0_0 new file mode 100644 index 0000000..27b50fc --- /dev/null +++ b/result/schemas/bug167754_0_0 @@ -0,0 +1 @@ +./test/schemas/bug167754_0.xml validates diff --git a/result/schemas/bug167754_0_0.err b/result/schemas/bug167754_0_0.err new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/result/schemas/bug167754_0_0.err |