summaryrefslogtreecommitdiff
path: root/doc/xmlio.html
blob: ae71ba1ae25f652c182e6d05132f6cea0d89637c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
<?xml version="1.0" encoding="ISO-8859-1"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" /><link rel="SHORTCUT ICON" href="/favicon.ico" /><style type="text/css">
TD {font-family: Verdana,Arial,Helvetica}
BODY {font-family: Verdana,Arial,Helvetica; margin-top: 2em; margin-left: 0em; margin-right: 0em}
H1 {font-family: Verdana,Arial,Helvetica}
H2 {font-family: Verdana,Arial,Helvetica}
H3 {font-family: Verdana,Arial,Helvetica}
A:link, A:visited, A:active { text-decoration: underline }
</style><title>I/O Interfaces</title></head><body bgcolor="#8b7765" text="#000000" link="#a06060" vlink="#000000"><table border="0" width="100%" cellpadding="5" cellspacing="0" align="center"><tr><td width="120"><a href="http://swpat.ffii.org/"><img src="epatents.png" alt="Action against software patents" /></a></td><td width="180"><a href="http://www.gnome.org/"><img src="gnome2.png" alt="Gnome2 Logo" /></a><a href="http://www.w3.org/Status"><img src="w3c.png" alt="W3C Logo" /></a><a href="http://www.redhat.com/"><img src="redhat.gif" alt="Red Hat Logo" /></a><div align="left"><a href="http://xmlsoft.org/"><img src="Libxml2-Logo-180x168.gif" alt="Made with Libxml2 Logo" /></a></div></td><td><table border="0" width="90%" cellpadding="2" cellspacing="0" align="center" bgcolor="#000000"><tr><td><table width="100%" border="0" cellspacing="1" cellpadding="3" bgcolor="#fffacd"><tr><td align="center"><h1>The XML C parser and toolkit of Gnome</h1><h2>I/O Interfaces</h2></td></tr></table></td></tr></table></td></tr></table><table border="0" cellpadding="4" cellspacing="0" width="100%" align="center"><tr><td bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="2" width="100%"><tr><td valign="top" width="200" bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="1" width="100%" bgcolor="#000000"><tr><td><table width="100%" border="0" cellspacing="1" cellpadding="3"><tr><td colspan="1" bgcolor="#eecfa1" align="center"><center><b>Developer Menu</b></center></td></tr><tr><td bgcolor="#fffacd"><form action="search.php" enctype="application/x-www-form-urlencoded" method="get"><input name="query" type="text" size="20" value="" /><input name="submit" type="submit" value="Search ..." /></form><ul><li><a href="index.html" style="font-weight:bold">Main Menu</a></li><li><a href="html/index.html" style="font-weight:bold">Reference Manual</a></li><li><a href="examples/index.html" style="font-weight:bold">Code Examples</a></li><li><a href="guidelines.html">XML Guidelines</a></li><li><a href="tutorial/index.html">Tutorial</a></li><li><a href="xmlreader.html">The Reader Interface</a></li><li><a href="ChangeLog.html">ChangeLog</a></li><li><a href="XSLT.html">XSLT</a></li><li><a href="python.html">Python and bindings</a></li><li><a href="architecture.html">libxml2 architecture</a></li><li><a href="tree.html">The tree output</a></li><li><a href="interface.html">The SAX interface</a></li><li><a href="xmlmem.html">Memory Management</a></li><li><a href="xmlio.html">I/O Interfaces</a></li><li><a href="library.html">The parser interfaces</a></li><li><a href="entities.html">Entities or no entities</a></li><li><a href="namespaces.html">Namespaces</a></li><li><a href="upgrade.html">Upgrading 1.x code</a></li><li><a href="threads.html">Thread safety</a></li><li><a href="DOM.html">DOM Principles</a></li><li><a href="example.html">A real example</a></li><li><a href="xml.html">flat page</a>, <a href="site.xsl">stylesheet</a></li></ul></td></tr></table><table width="100%" border="0" cellspacing="1" cellpadding="3"><tr><td colspan="1" bgcolor="#eecfa1" align="center"><center><b>API Indexes</b></center></td></tr><tr><td bgcolor="#fffacd"><ul><li><a href="APIchunk0.html">Alphabetic</a></li><li><a href="APIconstructors.html">Constructors</a></li><li><a href="APIfunctions.html">Functions/Types</a></li><li><a href="APIfiles.html">Modules</a></li><li><a href="APIsymbols.html">Symbols</a></li></ul></td></tr></table><table width="100%" border="0" cellspacing="1" cellpadding="3"><tr><td colspan="1" bgcolor="#eecfa1" align="center"><center><b>Related links</b></center></td></tr><tr><td bgcolor="#fffacd"><ul><li><a href="http://mail.gnome.org/archives/xml/">Mail archive</a></li><li><a href="http://xmlsoft.org/XSLT/">XSLT libxslt</a></li><li><a href="http://phd.cs.unibo.it/gdome2/">DOM gdome2</a></li><li><a href="http://www.aleksey.com/xmlsec/">XML-DSig xmlsec</a></li><li><a href="ftp://xmlsoft.org/">FTP</a></li><li><a href="http://www.zlatkovic.com/projects/libxml/">Windows binaries</a></li><li><a href="http://www.blastwave.org/packages.php/libxml2">Solaris binaries</a></li><li><a href="http://www.explain.com.au/oss/libxml2xslt.html">MacOsX binaries</a></li><li><a href="http://libxmlplusplus.sourceforge.net/">C++ bindings</a></li><li><a href="http://www.zend.com/php5/articles/php5-xmlphp.php#Heading4">PHP bindings</a></li><li><a href="http://sourceforge.net/projects/libxml2-pas/">Pascal bindings</a></li><li><a href="http://libxml.rubyforge.org/">Ruby bindings</a></li><li><a href="http://tclxml.sourceforge.net/">Tcl bindings</a></li><li><a href="http://bugzilla.gnome.org/buglist.cgi?product=libxml2">Bug Tracker</a></li></ul></td></tr></table></td></tr></table></td><td valign="top" bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="1" width="100%"><tr><td><table border="0" cellspacing="0" cellpadding="1" width="100%" bgcolor="#000000"><tr><td><table border="0" cellpadding="3" cellspacing="1" width="100%"><tr><td bgcolor="#fffacd"><p>Table of Content:</p><ol><li><a href="#General1">General overview</a></li>
  <li><a href="#basic">The basic buffer type</a></li>
  <li><a href="#Input">Input I/O handlers</a></li>
  <li><a href="#Output">Output I/O handlers</a></li>
  <li><a href="#entities">The entities loader</a></li>
  <li><a href="#Example2">Example of customized I/O</a></li>
</ol><h3><a name="General1" id="General1">General overview</a></h3><p>The module <code><a href="http://xmlsoft.org/html/libxml-xmlio.html">xmlIO.h</a></code>providestheinterfaces
to the libxml2 I/O system. This consists of 4 main parts:</p><ul><li>Entities loader, this is a routine which tries to fetch
    theentities(files) based on their PUBLIC and SYSTEM identifiers. The
    defaultloaderdon't look at the public identifier since libxml2 do not
    maintainacatalog. You can redefine you own entity loader
    byusing<code>xmlGetExternalEntityLoader()</code>and<code>xmlSetExternalEntityLoader()</code>.<a href="#entities">Check theexample</a>.</li>
  <li>Input I/O buffers which are a commodity structure used by
    theparser(s)input layer to handle fetching the informations to feed
    theparser. Thisprovides buffering and is also a placeholder where
    theencodingconverters to UTF8 are piggy-backed.</li>
  <li>Output I/O buffers are similar to the Input ones and fulfillsimilartask
    but when generating a serialization from a tree.</li>
  <li>A mechanism to register sets of I/O callbacks and associate
    themwithspecific naming schemes like the protocol part of the URIs.
    <p>This affect the default I/O operations and allows to use
    specificI/Ohandlers for certain names.</p>
  </li>
</ul><p>The general mechanism used when loading
http://rpmfind.net/xml.htmlforexample in the HTML parser is the following:</p><ol><li>The default entity loader
    calls<code>xmlNewInputFromFile()</code>withthe parsing context and the
    URIstring.</li>
  <li>the URI string is checked against the existing registered
    handlersusingtheir match() callback function, if the HTTP module was
    compiledin, it isregistered and its match() function will succeeds</li>
  <li>the open() function of the handler is called and if
    successfulwillreturn an I/O Input buffer</li>
  <li>the parser will the start reading from this buffer
    andprogressivelyfetch information from the resource, calling the
    read()function of thehandler until the resource is exhausted</li>
  <li>if an encoding change is detected it will be installed on
    theinputbuffer, providing buffering and efficient use of
    theconversionroutines</li>
  <li>once the parser has finished, the close() function of the
    handleriscalled once and the Input buffer and associated
    resourcesaredeallocated.</li>
</ol><p>The user defined callbacks are checked first to allow overriding
ofthedefault libxml2 I/O routines.</p><h3><a name="basic" id="basic">The basic buffer type</a></h3><p>All the buffer manipulation handling is done
usingthe<code>xmlBuffer</code>type define in <code><a href="http://xmlsoft.org/html/libxml-tree.html">tree.h</a></code>which
isaresizable memory buffer. The buffer allocation strategy can be selected
tobeeither best-fit or use an exponential doubling one (CPU vs.
memoryusetrade-off). The values
are<code>XML_BUFFER_ALLOC_EXACT</code>and<code>XML_BUFFER_ALLOC_DOUBLEIT</code>,and
can be set individually or on asystem wide basis
using<code>xmlBufferSetAllocationScheme()</code>. A numberof functions allows
tomanipulate buffers with names starting
withthe<code>xmlBuffer...</code>prefix.</p><h3><a name="Input" id="Input">Input I/O handlers</a></h3><p>An Input I/O handler is a
simplestructure<code>xmlParserInputBuffer</code>containing a context
associated totheresource (file descriptor, or pointer to a protocol handler),
the read()andclose() callbacks to use and an xmlBuffer. And extra xmlBuffer
and acharsetencoding handler are also present to support charset
conversionwhenneeded.</p><h3><a name="Output" id="Output">Output I/O handlers</a></h3><p>An Output handler <code>xmlOutputBuffer</code>is completely similar
toanInput one except the callbacks are write() and close().</p><h3><a name="entities" id="entities">The entities loader</a></h3><p>The entity loader resolves requests for new entities and create
inputsforthe parser. Creating an input from a filename or an URI string
isdonethrough the xmlNewInputFromFile() routine.  The default entity loader
donothandle the PUBLIC identifier associated with an entity (if any). So
itjustcalls xmlNewInputFromFile() with the SYSTEM identifier (which
ismandatory inXML).</p><p>If you want to hook up a catalog mechanism then you simply need
tooverridethe default entity loader, here is an example:</p><pre>#include &lt;libxml/xmlIO.h&gt;

xmlExternalEntityLoader defaultLoader = NULL;

xmlParserInputPtr
xmlMyExternalEntityLoader(const char *URL, const char *ID,
                               xmlParserCtxtPtr ctxt) {
    xmlParserInputPtr ret;
    const char *fileID = NULL;
    /* lookup for the fileID depending on ID */

    ret = xmlNewInputFromFile(ctxt, fileID);
    if (ret != NULL)
        return(ret);
    if (defaultLoader != NULL)
        ret = defaultLoader(URL, ID, ctxt);
    return(ret);
}

int main(..) {
    ...

    /*
     * Install our own entity loader
     */
    defaultLoader = xmlGetExternalEntityLoader();
    xmlSetExternalEntityLoader(xmlMyExternalEntityLoader);

    ...
}</pre><h3><a name="Example2" id="Example2">Example of customized I/O</a></h3><p>This example come from <a href="http://xmlsoft.org/messages/0708.html">areal use case</a>,xmlDocDump()
closes the FILE * passed by the applicationand this was aproblem. The <a href="http://xmlsoft.org/messages/0711.html">solution</a>wasto redefine anew
output handler with the closing call deactivated:</p><ol><li>First define a new I/O output allocator where the output don't
    closethefile:
    <pre>xmlOutputBufferPtr
xmlOutputBufferCreateOwn(FILE *file, xmlCharEncodingHandlerPtr encoder) {
    xmlOutputBufferPtr ret;
    
    if (xmlOutputCallbackInitialized == 0)
        xmlRegisterDefaultOutputCallbacks();

    if (file == NULL) return(NULL);
    ret = xmlAllocOutputBuffer(encoder);
    if (ret != NULL) {
        ret-&gt;context = file;
        ret-&gt;writecallback = xmlFileWrite;
        ret-&gt;closecallback = NULL;  /* No close callback */
    }
    return(ret);
} </pre>
  </li>
  <li>And then use it to save the document:
    <pre>FILE *f;
xmlOutputBufferPtr output;
xmlDocPtr doc;
int res;

f = ...
doc = ....

output = xmlOutputBufferCreateOwn(f, NULL);
res = xmlSaveFileTo(output, doc, NULL);
    </pre>
  </li>
</ol><p><a href="bugs.html">Daniel Veillard</a></p></td></tr></table></td></tr></table></td></tr></table></td></tr></table></td></tr></table></body></html>