1 files changed, 0 insertions, 282 deletions
diff --git a/test/intsubset2.xml b/test/intsubset2.xml
deleted file mode 100644
index 4ae845a..0000000
--- a/test/intsubset2.xml
+++ /dev/null
@@ -1,282 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE kanjidic2 [
-	<!-- Version 1.3
-	This is the DTD of the XML-format kanji file combining information from
-	the KANJIDIC and KANJD212 files. It is intended to be largely self-
-	documenting, with each field being accompanied by an explanatory
-	comment.
-
-	The file covers the following kanji:
-	(a) the 6,355 kanji from JIS X 0208;
-	(b) the 5,801 kanji from JIS X 0212;
-	(c) the 3,625 kanji from JIS X 0213 as follows:
-		(i) the 2,741 kanji which are also in JIS X 0212 have
-		JIS X 0213 code-points (kuten) added to the existing entry;
-		(ii) the 884 "new" kanji have new entries.
-
-	At the end of the explanation for a number of fields there is a tag
-	with the format [N]. This indicates the leading letter(s) of the
-	equivalent field in the KANJIDIC and KANJD212 files.
-
-	The KANJIDIC documentation should also be read for additional 
-	information about the information in the file.
-	-->
-<!ELEMENT kanjidic2 (header,character*)>
-<!ELEMENT header (file_version,database_version,date_of_creation)>
-<!--
-	The single header element will contain identification information
-	about the version of the file 
-	-->
-<!ELEMENT file_version (#PCDATA)>
-<!--
-	This field denotes the version of kanjidic2 structure, as more
-	than one version may exist.
-	-->
-<!ELEMENT database_version (#PCDATA)>
-<!--
-	The version of the file, in the format YYYY-NN, where NN will be
-	a number starting with 01 for the first version released in a
-	calendar year, then increasing for each version in that year.
-	-->
-<!ELEMENT date_of_creation (#PCDATA)>
-<!--
-	The date the file was created in international format (YYYY-MM-DD).
-	-->
-<!ELEMENT character (literal,codepoint, radical, misc, dic_number?, query_code?, reading_meaning?,nanori?)*>
-<!ELEMENT literal (#PCDATA)>
-<!--
-	The character itself in UTF8 coding.
-	-->
-<!ELEMENT codepoint (cp_value+)>
-	<!-- 
-	The codepoint element states the code of the character in the various
-	character set standards.
-	-->
-<!ELEMENT cp_value (#PCDATA)>
-	<!-- 
-	The cp_value contains the codepoint of the character in a particular
-	standard. The standard will be identified in the cp_type attribute.
-	-->
-<!ATTLIST cp_value cp_type CDATA #REQUIRED>
-	<!-- 
-	The cp_type attribute states the coding standard applying to the
-	element. The values assigned so far are:
-		jis208 - JIS X 0208-1997 - kuten coding (nn-nn)
-		jis212 - JIS X 0212-1990 - kuten coding (nn-nn)
-		jis213 - JIS X 0213-2000 - kuten coding (p-nn-nn)
-		ucs - Unicode 4.0 - hex coding (4 or 5 hexadecimal digits)
-	-->
-<!ELEMENT radical (rad_value+)>
-<!ELEMENT rad_value (#PCDATA)>
-	<!-- 
-	The radical number, in the range 1 to 214. The particular
-	classification type is stated in the rad_type attribute.
-	-->
-<!ATTLIST rad_value rad_type CDATA #REQUIRED>
-	<!-- 
-	The rad_type attribute states the type of radical classification.
-		classical - as recorded in the KangXi Zidian.
-		nelson - as used in the Nelson "Modern Japanese-English 
-		Character Dictionary" (i.e. the Classic, not the New Nelson).
-		This will only be used where Nelson reclassified the kanji.
-	-->
-<!ELEMENT misc (grade?, stroke_count+, variant*, freq*, rad_name*)>
-<!ELEMENT grade (#PCDATA)>
-	<!-- 
-	The Jouyou Kanji grade level. 1 through 6 indicate the grade in which
-	the kanji is taught in Japanese schools. 8 indicates it is one of the
-	remaining Jouyou Kanji to be learned in junior high school, and 9 
-	indicates it is a Jinmeiyou (for use in names) kanji. [G]
-	-->
-<!ELEMENT stroke_count (#PCDATA)>
-	<!-- 
-	The stroke count of the kanji, including the radical. If more than 
-	one, the first is considered the accepted count, while subsequent ones 
-	are common miscounts. (See Appendix E. of the KANJIDIC documentation
-	for some of the rules applied when counting strokes in some of the 
-	radicals.) [S]
-	-->
-<!ELEMENT variant (#PCDATA)>
-	<!-- 
-	A cross-reference code to another kanji, usually regarded as a variant.
-	The type of cross-reference is given in the var_type attribute.
-	-->
-<!ATTLIST variant var_type CDATA #REQUIRED>
-	<!-- 
-	The var_type attribute indicates the type of variant code. The current
-	values are: 
-		jis208 - in JIS X 0208 - kuten coding
-		jis212 - in JIS X 0212 - kuten coding
-		jis213 - in JIS X 0213 - kuten coding
-		deroo - De Roo number - numeric
-		njecd - Halpern NJECD index number - numeric
-		s_h - The Kanji Dictionary (Spahn & Hadamitzky) - descriptor
-		nelson - "Classic" Nelson - numeric
-		oneill - Japanese Names (O'Neill) - numeric
-	-->
-<!ELEMENT freq (#PCDATA)>
-	<!-- 
-	A frequency-of-use ranking. The 2,500 most-used characters have a 
-	ranking; those characters that lack this field are not ranked. The 
-	frequency is a number from 1 to 2,500 that expresses the relative 
-	frequency of occurrence of a character in modern Japanese. This is
-	based on a survey in newspapers, so it is biassed towards kanji
-	used in newspaper articles. The discrimination between the less
-	frequently used kanji is not strong.
-	-->
-<!ELEMENT rad_name (#PCDATA)>
-	<!-- 
-	When the kanji is itself a radical and has a name, this element
-	contains the name (in hiragana.) [T2]
-	-->
-<!ELEMENT dic_number (dic_ref+)>
-	<!-- 
-	This element contains the index numbers and similar unstructured
-	information such as page numbers in a number of published dictionaries,
-	and instructional books on kanji.
-	-->
-<!ELEMENT dic_ref (#PCDATA)>
-	<!-- 
-	Each dic_ref contains an index number. The particular dictionary,
-	etc. is defined by the dr_type attribute.
-	-->
-<!ATTLIST dic_ref dr_type CDATA #REQUIRED>
-	<!-- 
-	The dr_type defines the dictionary or reference book, etc. to which
-	dic_ref element applies. The initial allocation is:
-	  nelson_c - "Modern Reader's Japanese-English Character Dictionary",  
-	  	edited by Andrew Nelson (now published as the "Classic" 
-	  	Nelson).
-	  nelson_n - "The New Nelson Japanese-English Character Dictionary", 
-	  	edited by John Haig.
-	  halpern_njecd - "New Japanese-English Character Dictionary", 
-	  	edited by Jack Halpern.
-	  halpern_kkld - "Kanji Learners Dictionary" (Kodansha) edited by 
-	  	Jack Halpern.
-	  heisig - "Remembering The  Kanji"  by  James Heisig.
-	  gakken - "A  New Dictionary of Kanji Usage" (Gakken)
-	  oneill_names - "Japanese Names", by P.G. O'Neill. 
-	  oneill_kk - "Essential Kanji" by P.G. O'Neill.
-	  moro - "Daikanwajiten" compiled by Morohashi. For some kanji two
-	  	additional attributes are used: m_vol:  the volume of the
-	  	dictionary in which the kanji is found, and m_page: the page
-	  	number in the volume.
-	  henshall - "A Guide To Remembering Japanese Characters" by
-	  	Kenneth G.  Henshall.
-	  sh_kk - "Kanji and Kana" by Spahn and Hadamitzky.
-	  sakade - "A Guide To Reading and Writing Japanese" edited by
-	  	Florence Sakade.
-	  henshall3 - "A Guide To Reading and Writing Japanese" 3rd
-		edition, edited by Henshall, Seeley and De Groot.
-	  tutt_cards - Tuttle Kanji Cards, compiled by Alexander Kask.
-	  crowley - "The Kanji Way to Japanese Language Power" by
-	  	Dale Crowley.
-	  kanji_in_context - "Kanji in Context" by Nishiguchi and Kono.
-	  busy_people - "Japanese For Busy People" vols I-III, published
-		by the AJLT. The codes are the volume.chapter.
-	  kodansha_compact - the "Kodansha Compact Kanji Guide".
-	-->
-<!ATTLIST dic_ref m_vol CDATA #IMPLIED>
-	<!-- 
-	See above under "moro".
-	-->
-<!ATTLIST dic_ref m_page CDATA #IMPLIED>
-	<!-- 
-	See above under "moro".
-	-->
-<!ELEMENT query_code (q_code+)>
-	<!-- 
-	These codes contain information relating to the glyph, and can be used
-	for finding a required kanji. The type of code is defined by the
-	qc_type attribute.
-	-->
-<!ELEMENT q_code (#PCDATA)>
-	<!--
-	The q_code contains the actual query-code value, according to the
-	qc_type attribute.
-	-->
-<!ATTLIST q_code qc_type CDATA #REQUIRED>
-	<!-- 
-	The q_code attribute defines the type of query code. The current values
-	are:
-	  skip -  Halpern's SKIP (System  of  Kanji  Indexing  by  Patterns) 
-	  	code. The  format is n-nn-nn.  See the KANJIDIC  documentation 
-	  	for  a description of the code and restrictions on  the 
-	  	commercial  use  of this data. [P]
-
-	  sh_desc - the descriptor codes for The Kanji Dictionary (Tuttle 
-	  	1996) by Spahn and Hadamitzky. They are in the form nxnn.n,  
-	  	e.g.  3k11.2, where the  kanji has 3 strokes in the 
-	  	identifying radical, it is radical "k" in the SH 
-	  	classification system, there are 11 other strokes, and it is 
-	  	the 2nd kanji in the 3k11 sequence. (I am very grateful to 
-	  	Mark Spahn for providing the list of these descriptor codes 
-	  	for the kanji in this file.) [I]
-	  four_corner - the "Four Corner" code for the kanji. This is a code 
-	  	invented by Wang Chen in 1928. See the KANJIDIC documentation 
-	  	for  an overview of  the Four Corner System. [Q]
-
-	  deroo - the codes developed by the late Father Joseph De Roo, and 
-	  	published in  his book "2001 Kanji" (Bojinsha). Fr De Roo 
-	  	gave his permission for these codes to be included. [DR]
-	  misclass - a possible misclassification of the kanji according
-		to one of the code types. (See the "Z" codes in the KANJIDIC
-		documentation for more details.)
-	  
-	-->
-<!ELEMENT reading_meaning (rmgroup*, nanori*)>
-	<!-- 
-	The readings for the kanji in several languages, and the meanings, also
-	in several languages. The readings and meanings are grouped to enable
-	the handling of the situation where the meaning is differentiated by 
-	reading. [T1]
-	-->
-<!ELEMENT nanori (#PCDATA)>
-	<!-- 
-	Japanese readings that are now only associated with names.
-	-->
-<!ELEMENT rmgroup (reading*, meaning*)>
-<!ELEMENT reading (#PCDATA)>
-	<!-- 
-	The reading element contains the reading or pronunciation
-	of the kanji.
-	-->
-<!ATTLIST reading r_type CDATA #REQUIRED>
-	<!-- 
-	The r_type attribute defines the type of reading in the reading
-	element. The current values are:
-	  pinyin - the modern PinYin romanization of the Chinese reading 
-	  	of the kanji. The tones are represented by a concluding 
-	  	digit. [Y]
-	  korean_r - the romanized form of the Korean reading(s) of the 
-	  	kanji.  The readings are in the (Republic of Korea) Ministry 
-	  	of Education style of romanization. [W]
-	  korean_h - the Korean reading(s) of the kanji in hangul.
-	  ja_on - the "on" Japanese reading of the kanji, in katakana. A
-	  	second attribute r_status, if present, will indicate with
-	  	a value of "jy" whether the reading is approved for a
-	  	"Jouyou kanji".
-	  ja_kun - the "kun" Japanese reading of the kanji, in hiragana. 
-	  	Where relevant the okurigana is also included separated by a 
-	  	".". Readings associated with prefixes and suffixes are 
-	  	marked with a "-". A second attribute r_status, if present, 
-	  	will indicate with a value of "jy" whether the reading is 
-	  	approved for a "Jouyou kanji".
-	-->
-<!ATTLIST reading r_status CDATA #IMPLIED>
-	<!-- 
-	See under ja_on and ja_kun above.
-	-->
-<!ELEMENT meaning (#PCDATA)>
-	<!-- 
-	The meaning associated with the kanji.
-	-->
-<!ATTLIST meaning m_lang CDATA #IMPLIED>
-	<!-- 
-	The m_lang attribute defines the target language of the meaning. It 
-	will be coded using the two-letter language code from the ISO 639 
-	standard. When absent, the value "en" (i.e. English) is implied. [{}]
-	-->
-] >
-<kanjidic2>
-</kanjidic2>