{ $Id: htmldefs.pp,v 1.2 2006/01/03 23:33:23 lukvdl Exp $ This file is part of the Free Component Library HTML definitions and utility functions Copyright (c) 2000-2002 by Areca Systems GmbH / Sebastian Guenther, sg@freepascal.org See the file COPYING.FPC, included in this distribution, for details about the copyright. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. **********************************************************************} unit HTMLDefs; {$MODE objfpc} {$H+} interface type THTMLCData = string; THTMLID = string; THTMLName = string; THTMLIDRef = string; THTMLIDRefs = string; THTMLNumber = longint; THTMLText = THTMLCData; THTMLCharsets = THTMLCData; THTMLContentTypes = THTMLCData; THTMLURI = string; THTMLCharacter = char; THTMLDir = (dirEmpty,dirLeftToRight,dirRightToLeft); THTMLalign = (alEmpty,alleft,alcenter,alright,aljustify,alchar); THTMLvalign = (vaEmpty,vatop,vamiddle,vabottom,vabaseline); THTMLframe = (frEmpty,frvoid,frabove,frbelow,frhsides,frvsides,frlefthandsise,frrighthandside,frbox,frborder); THTMLrules = (ruEmpty,runone,rugroups,rurows,rucols,ruall); THTMLvaluetype = (vtEmpty,vtdata,vtref,vtobject); THTMLshape = (shEmpty,shdefault,shrect,shcircle,shpoly); THTMLinputtype = (itEmpty,ittext,itpassword,itcheckbox,itradio,itsubmit,itreset,itfile,ithidden,itimage,itbutton); THTMLbuttontype = (btEmpty,btsubmit,btreset,btbutton); THTMLColor = ( clHTMLBlack, clHTMLSilver, clHTMLGray, clHTMLWhite, clHTMLMaroon, // #000000 #C0C0C0 #808080 #FFFFFF #800000 clHTMLRed, clHTMLPurple, clHTMLFuchsia,clHTMLGreen, clHTMLLime, clHTMLOlive, // #FF0000 #800080 #FF00FF #008000 #00FF00 #808000 clHTMLYellow,clHTMLNavy, clHTMLBlue, clHTMLTeal, clHTMLAqua // #FFFF00 #000080 #0000FF #008080 #00FFFF ); THTMLAttributeTag = ( atabbr, atalink, atacceptcharset, ataccept, ataccesskey, ataction, atalign, atalt, atarchive, ataxis, atbackground, atbgcolor, atborder, atcellpadding, atcellspacing, atchar, atcharoff, atcharset, atchecked, atcite, atclass, atclassid, atclear, atcode, atcodebase, atcodetype, atcolor, atcols, atcolspan, atcompact, atcontent, atcoords, atdata, atdatetime, atdeclare,atdefer, atdir, atdisabled, atenctype, atface, atfor, atframe, atframeborder, atheaders, atheight, athref, athreflang, athspace, athttpequiv, atid, atismap, atlabel, atlang, atlink, atlongdesc, atmarginheight, atmarginwidth, atmaxlength, atmedia, atmethod, atmultiple, atname, atnohref, atnoresize, atnoshade, atnowrap, atobject, atonblur, atonchange, atonclick, atondblclick, atonfocus, atonkeydown, atonkeypress, atonkeyup, atonload, atonmousedown, atonmousemove, atonmouseout, atonmouseover, atonmouseup, atonreset, atonselect, atonsubmit, atonunload, atprofile, atprompt, atreadonly, atrel, atrev, atrows, atrowspan, atrules, atscheme, atscope, atscrolling, atselected, atshape, atsize, atspan, atsrc, atstandby, atstart, atstyle, atsummary, attabindex, attarget, attext, attitle, attype, atusemap, atvalign, atvalue, atvaluetype, atversion, atvlink, atvspace, atwidth ); THTMLAttributeSet = set of THTMLAttributeTag; THTMLElementTag = ( eta, etabbr, etacronym, etaddress, etapplet, etarea, etb, etbase, etbasefont, etbdo, etbig, etblockquote, etbody, etbr, etbutton, etcaption, etcenter, etcite, etcode, etcol, etcolgroup, etdd, etdel, etdfn, etdir, etdiv, etdl, etdt, etem, etfieldset, etfont, etform, etframe, etframeset, eth1, eth2, eth3, eth4, eth5, eth6, ethead, ethr, ethtml, eti, etiframe, etimg, etinput, etins, etisindex, etkbd, etlabel, etlegend, etli, etlink, etmap, etmenu, etmeta, etnoframes, etnoscript, etobject, etol, etoptgroup, etoption, etp, etparam, etpre, etq, ets, etsamp, etscript, etselect, etsmall, etspan, etstrike, etstrong, etstyle, etsub, etsup, ettable, ettbody, ettd, ettextarea, ettfoot, etth, etthead, ettitle, ettr, ettt, etu, etul, etvar, etText, etUnknown ); THTMLElementTagSet = set of THTMLElementTag; THTMLElementFlag = ( efSubelementContent, // may have subelements efPCDATAContent, // may have PCDATA content efPreserveWhitespace, // preserve all whitespace efDeprecated, // can be dropped in future versions efNoChecks, // Checks (attributes,subtags,...) can only be implemented in descendants efEndTagOptional ); THTMLElementFlags = set of THTMLElementFlag; PHTMLElementProps = ^THTMLElementProps; THTMLElementProps = record Name: String; Flags: THTMLElementFlags; Attributes: THTMLAttributeSet; end; const BooleanAttributes = [atchecked,atdeclare,atdefer,atdisabled,atnohref,atnoresize, atmultiple,atreadonly,atselected]; DeprecatedAttributes = [atalink, atbackground, atbgcolor, atclear, atcode, atcolor, atcompact, atface, athspace, atlink, atnoshade, atnowrap, atobject, atprompt, atstart, attext, atvlink, atversion, atvspace]; efSubcontent = [efSubelementContent, efPCDATAContent]; atsi18n = [atlang, atdir]; atscoreattrs = [atid,atclass,atstyle,attitle]; atsevents = [atonclick,atondblclick,atonmousedown,atonmouseup,atonmouseover, atonmousemove,atonmouseout,atonkeypress,atonkeydown,atonkeyup]; atsattrs = atsevents + atscoreattrs + atsi18n; atscellhalign = [atalign, atchar, atcharoff]; { etsStructured := []; etsDivisions := []; etsLists := []; etsLinks := []; etsObjects := [etImg, etObject, etApplet, etMap, etArea]; etsForms := [etForm]; etsText = etsStructured + etsDivisions + etsLists + etsLinks + etsObjects + etsForms + etTable + etText + etScript + ; } HTMLElementProps: array[THTMLElementTag] of THTMLElementProps = ( (Name: 'a'; Flags: efSubcontent; Attributes: atsattrs+[atcharset,attype,atname,athref,athreflang,atrel,atrev, ataccesskey,atshape,atcoords,attabindex,atonfocus,atonblur]), (Name: 'abbr'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'acronym'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'address'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'applet'; Flags: efSubcontent+[efDeprecated]; Attributes: atscoreattrs+[atcodebase,atarchive,atalt,atname,atwidth,atheight]), (Name: 'area'; Flags: []; Attributes: atsattrs+[atshape,atcoords,athref,atnohref,atalt,attabindex, ataccesskey,atonfocus,atonblur]), (Name: 'b'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'base'; Flags: []; Attributes: [athref]), (Name: 'basefont'; Flags: [efDeprecated]; Attributes: [atid]), (Name: 'bdo'; Flags: efSubcontent; Attributes: atscoreattrs+[atlang,atdir]), (Name: 'big'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'blockquote';Flags: [efSubelementContent]; Attributes: atsattrs+[atcite]), (Name: 'body'; Flags: [efSubelementContent]; Attributes: atsAttrs+[atonload, atonunload]), (Name: 'br'; Flags: []; Attributes: atscoreattrs), (Name: 'button'; Flags: efSubcontent; Attributes: atsattrs+[atname,atvalue,attype,atdisabled,attabindex, ataccesskey,atonfocus,atonblur]), (Name: 'caption'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'center'; Flags: [efSubelementContent,efDeprecated]; Attributes: []), (Name: 'cite'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'code'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'col'; Flags: []; Attributes: atsattrs+atscellhalign+[atvalign,atspan,atwidth]), (Name: 'colgroup'; Flags: [efSubelementContent, efEndTagOptional]; Attributes: atsattrs+atscellhalign+[atvalign,atspan,atwidth]), (Name: 'dd'; Flags: efSubcontent+[efEndTagOptional]; Attributes: atsattrs), (Name: 'del'; Flags: [efSubelementContent]; Attributes: atsattrs+[atcite,atdatetime]), (Name: 'dfn'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'dir'; Flags: [efSubelementContent,efDeprecated]; Attributes: atsattrs), (Name: 'div'; Flags: efSubContent; Attributes: atsattrs), (Name: 'dl'; Flags: [efSubelementContent]; Attributes: atsattrs), (Name: 'dt'; Flags: [efPCDataContent, efEndTagOptional]; Attributes: atsattrs), (Name: 'em'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'fieldset'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'font'; Flags: efSubcontent+[efDeprecated]; Attributes: atscoreattrs+atsi18n), (Name: 'form'; Flags: [efSubelementContent]; Attributes: atsattrs+[ataction,atmethod,atenctype,atonsubmit,atonreset,atacceptcharset]), (Name: 'frame'; Flags: []; Attributes: atscoreattrs+[atlongdesc,atname,atsrc,atframeborder, atmarginwidth,atmarginheight,atnoresize,atscrolling]), (Name: 'frameset'; Flags: efSubcontent; Attributes: atsCoreattrs+[atrows,atcols,atonload,atonunload]), (Name: 'h1'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'h2'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'h3'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'h4'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'h5'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'h6'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'head'; Flags: [efSubelementContent]; Attributes: atsi18n+[atprofile]), (Name: 'hr'; Flags: []; Attributes: atscoreattrs+atsevents), (Name: 'html'; Flags: [efSubelementContent]; Attributes: atsi18n), (Name: 'i'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'iframe'; Flags: [efSubelementContent]; Attributes: atscoreattrs+[atlongdesc,atname,atsrc,atframeborder,atmarginwidth, atmarginheight,atscrolling,atalign,atheight,atwidth]), (Name: 'img'; Flags: []; Attributes: atsattrs+[atsrc,atalt,atlongdesc,atheight,atwidth,atusemap,atismap]), (Name: 'input'; Flags: []; Attributes: atsattrs+[attype,atname,atvalue,atchecked,atdisabled, atreadonly,atsize,atmaxlength,atsrc,atalt,atusemap,attabindex, ataccesskey,atonfocus,atonblur,atonselect,atonchange,ataccept]), (Name: 'ins'; Flags: [efSubelementContent]; Attributes: atsattrs+[atcite,atdatetime]), (Name: 'isindex'; Flags: [efDeprecated]; Attributes: atscoreattrs+atsi18n), (Name: 'kbd'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'label'; Flags: efSubcontent; Attributes: atsattrs+[atfor,ataccesskey,atonfocus,atonblur]), (Name: 'legend'; Flags: efSubcontent; Attributes: atsattrs+[ataccesskey]), (Name: 'li'; Flags: efSubcontent+[efEndTagOptional]; Attributes: atsattrs), (Name: 'link'; Flags: []; Attributes: atsattrs+[atcharset,athref,athreflang,attype,atrel,atrev,atmedia]), (Name: 'map'; Flags: [efSubelementContent]; Attributes: atsattrs+[atname]), (Name: 'menu'; Flags: [efSubelementContent,efDeprecated]; Attributes: atsattrs), (Name: 'meta'; Flags: []; Attributes: atsi18n+[athttpequiv,atname,atcontent,atscheme]), (Name: 'noframes'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'noscript'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'object'; Flags: efSubcontent; Attributes: atsattrs+[atdeclare,atclassid,atcodebase,atdata,attype,atcodetype, atarchive,atstandby,atheight,atwidth,atusemap,atname,attabindex]), (Name: 'ol'; Flags: [efSubelementContent]; Attributes: atsattrs), (Name: 'optgroup'; Flags: efSubcontent; Attributes: atsattrs+[atdisabled,atlabel]), (Name: 'option'; Flags: efSubcontent+[efEndTagOptional]; Attributes: atsattrs+[atselected,atdisabled,atlabel,atvalue]), (Name: 'p'; Flags: efSubcontent+[efEndTagOptional]; Attributes: atsattrs), (Name: 'param'; Flags: []; Attributes: [atid,atname,atvalue,atvaluetype,attype]), (Name: 'pre'; Flags: efSubcontent + [efPreserveWhitespace]; Attributes: atsattrs), (Name: 'q'; Flags: efSubcontent; Attributes: atsattrs+[atcite]), (Name: 's'; Flags: efSubcontent+[efDeprecated]; Attributes: atsattrs), (Name: 'samp'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'script'; Flags: [efPCDATAContent]; Attributes: [atcharset,attype,atsrc,atdefer]), (Name: 'select'; Flags: [efSubelementContent]; Attributes: atsattrs+[atname,atsize,atmultiple,atdisabled,attabindex,atonfocus, atonblur,atonchange]), (Name: 'small'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'span'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'strike'; Flags: efSubcontent+[efDeprecated]; Attributes: atsattrs), (Name: 'strong'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'style'; Flags: [efPCDATAContent]; Attributes: atsi18n+[attype,atmedia,attitle]), (Name: 'sub'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'sup'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'table'; Flags: [efSubelementContent]; Attributes: atsattrs+[atsummary,atwidth,atborder,atframe,atrules,atcellspacing,atcellpadding]), (Name: 'tbody'; Flags: [efSubelementContent]; Attributes: atsattrs+atscellhalign+[atvalign]), (Name: 'td'; Flags: efSubcontent+[efEndTagOptional]; Attributes: atsattrs+atscellhalign+[atvalign,atabbr,ataxis,atheaders,atscope,atrowspan,atcolspan]), (Name: 'textarea'; Flags: [efPCDATAContent]; Attributes: atsattrs+[atname,atrows,atcols,atdisabled,atreadonly,attabindex, ataccesskey,atonfocus,atonblur,atonselect,atonchange]), (Name: 'tfoot'; Flags: [efSubelementContent,efEndTagOptional]; Attributes: atsattrs+atscellhalign+[atvalign]), (Name: 'th'; Flags: efSubcontent+[efEndTagOptional]; Attributes: atsattrs+atscellhalign+[atvalign,atabbr,ataxis,atheaders,atscope,atrowspan,atcolspan]), (Name: 'thead'; Flags: [efSubelementContent, efEndTagOptional]; Attributes: atsattrs+atscellhalign+[atvalign]), (Name: 'title'; Flags: efSubcontent; Attributes: atsi18n), (Name: 'tr'; Flags: [efSubelementContent, efEndTagOptional]; Attributes: atsattrs+atscellhalign+[atvalign]), (Name: 'tt'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'u'; Flags: efSubcontent+[efDeprecated]; Attributes: atsattrs), (Name: 'ul'; Flags: [efSubelementContent]; Attributes: atsattrs), (Name: 'var'; Flags: efSubcontent; Attributes: atsattrs), (Name: 'text'; Flags: efSubcontent; Attributes: []), (Name: 'unknown'; Flags: efSubcontent+[efNoChecks]; Attributes: []) ); HTMLAttributeTag : array [THTMLAttributeTag] of String = ( 'abbr', 'alink', 'accept-charset', 'accept', 'accesskey', 'action', 'align', 'alt', 'archive', 'axis', 'background', 'bgcolor', 'border', 'cellpadding', 'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class', 'classid', 'clear', 'code', 'codebase', 'codetype', 'color', 'cols', 'colspan', 'compact', 'content', 'coords', 'data', 'datetime', 'declare', 'defer', 'dir', 'disabled', 'enctype', 'face', 'for', 'frame', 'frameborder', 'headers', 'height', 'href', 'hreflang', 'hspace', 'http-equiv', 'id', 'ismap', 'label', 'lang', 'link', 'longdesc', 'marginheight', 'marginwidth', 'maxlength', 'media', 'method', 'multiple', 'name', 'nohref', 'noresize', 'noshade', 'nowrap', 'object', 'onblur', 'onchange', 'onclick', 'ondblclick', 'onfocus', 'onkeydown', 'onkeypress', 'onkeyup', 'onload', 'onmousedown', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup', 'onreset', 'onselect', 'onsubmit', 'onunload', 'profile', 'prompt', 'readonly', 'rel', 'rev', 'rows', 'rowspan', 'rules', 'scheme', 'scope', 'scrolling', 'selected', 'shape', 'size', 'span', 'src', 'standby', 'start', 'style', 'summary', 'tabindex', 'target', 'text', 'title', 'type', 'usemap', 'valign', 'value', 'valuetype', 'version', 'vlink', 'vspace', 'width'); HTMLColor : array [THTMLColor] of string = ('Black', 'Silver', 'Gray', 'White', 'Maroon', 'Red', 'Purple', 'Fuchsia', 'Green', 'Lime', 'Olive', 'Yellow', 'Navy', 'Blue', 'Teal', 'Aqua'); HTMLDir : array [THTMLDir] of string = ('','LTR','RTL'); HTMLAlign : array [THTMLalign] of string = ('','left','center','right','justify','char'); HTMLvalign : array [THTMLvalign] of string = ('','top','middle','bottom','baseline'); HTMLframe : array [THTMLframe] of string = ('','void','above','below','hsides','vsides','lhs','rhs','box','border'); HTMLrules : array [THTMLrules] of string = ('','none','groups','rows','cols','all'); HTMLvaluetype : array [THTMLvaluetype] of string = ('','data','ref','object'); HTMLshape : array [THTMLshape] of string = ('','default','rect','circle','poly'); HTMLinputtype : array [THTMLinputtype] of string = ('','text','password','checkbox', 'radio','submit','reset','file','hidden','image','button'); HTMLbuttontype : array [THTMLbuttontype] of string = ('','submit','reset','button'); function ResolveHTMLEntityReference(const Name: WideString; var Entity: WideChar): Boolean; function IsAutoClose(NewTag, OldTag: THTMLElementTag): Boolean; implementation uses SysUtils; { Define which elements auto-close other elements, modelled after libxml2. This is an array of variable-length lists, each terminated by etUnknown. Indices to first element of each list are provided by AutoCloseIndex array, which *must* be updated after any change. } const AutoCloseTab: array[0..277] of THTMLElementTag = ( etform, etform, etp, ethr, eth1, eth2, eth3, eth4, eth5, eth6, etdl, etul, etol, etmenu, etdir, etaddress, etpre, ethead, etUnknown, ethead, etp, etUnknown, ettitle, etp, etUnknown, etbody, ethead, etstyle, etlink, ettitle, etp, etUnknown, etframeset, ethead, etstyle, etlink, ettitle, etp, etUnknown, etli, etp, eth1, eth2, eth3, eth4, eth5, eth6, etdl, etaddress, etpre, ethead, etli, etUnknown, ethr, etp, ethead, etUnknown, eth1, etp, ethead, etUnknown, eth2, etp, ethead, etUnknown, eth3, etp, ethead, etUnknown, eth4, etp, ethead, etUnknown, eth5, etp, ethead, etUnknown, eth6, etp, ethead, etUnknown, etdir, etp, ethead, etUnknown, etaddress, etp, ethead, etul, etUnknown, etpre, etp, ethead, etul, etUnknown, etblockquote, etp, ethead, etUnknown, etdl, etp, etdt, etmenu, etdir, etaddress, etpre, ethead, etUnknown, etdt, etp, etmenu, etdir, etaddress, etpre, ethead, etdd, etUnknown, etdd, etp, etmenu, etdir, etaddress, etpre, ethead, etdt, etUnknown, etul, etp, ethead, etol, etmenu, etdir, etaddress, etpre, etUnknown, etol, etp, ethead, etul, etUnknown, etmenu, etp, ethead, etul, etUnknown, etp, etp, ethead, eth1, eth2, eth3, eth4, eth5, eth6, etUnknown, etdiv, etp, ethead, etUnknown, etnoscript, etp, ethead, etUnknown, etcenter, etfont, etb, eti, etp, ethead, etUnknown, eta, eta, etUnknown, etcaption, etp, etUnknown, etcolgroup, etcaption, etcolgroup, etcol, etp, etUnknown, etcol, etcaption, etcol, etp, etUnknown, ettable, etp, ethead, eth1, eth2, eth3, eth4, eth5, eth6, etpre, eta, etUnknown, etth, etth, ettd, etp, etspan, etfont, eta, etb, eti, etu, etUnknown, ettd, etth, ettd, etp, etspan, etfont, eta, etb, eti, etu, etUnknown, ettr, etth, ettd, ettr, etcaption, etcol, etcolgroup, etp, etUnknown, etthead, etcaption, etcol, etcolgroup, etUnknown, ettfoot, etth, ettd, ettr, etcaption, etcol, etcolgroup, etthead, ettbody, etp, etUnknown, ettbody, etth, ettd, ettr, etcaption, etcol, etcolgroup, etthead, ettfoot, ettbody, etp, etUnknown, etoptgroup, etoption, etUnknown, etoption, etoption, etUnknown, etfieldset, etlegend, etp, ethead, eth1, eth2, eth3, eth4, eth5, eth6, etpre, eta, etUnknown, etUnknown); AutoCloseIndex: array[0..40] of Integer = ( 0, 19, 22, 25, 32, 39, 53, 57, 61, 65, 69, 73, 77, 81, 85, 90, 95, 99, 108, 117, 126, 135, 140, 145, 155, 159, 163, 170, 173, 176, 182, 187, 199, 210, 221, 230, 235, 246, 258, 261, 264 ); { HTML entities, each preceded with its code. There is a separate list for each entity length, and each list is sorted by character codes. The sole purpose of using AnsiString here is staying compatible with Delphi 7, which is totally broken with respect to handling wide literals. } ent_2 = #3#$9C + 'Mu'+ #3#$9D + 'Nu'+ #3#$A0 + 'Pi'+ #3#$9E + 'Xi'+ #$22#$65+ 'ge'+ #0#62 + 'gt'+ #$22#$64+ 'le'+ #0#60 + 'lt'+ #3#$BC + 'mu'+ #$22#$60+ 'ne'+ #$22#$0B+ 'ni'+ #3#$BD + 'nu'+ #$22#$28+ 'or'+ #3#$C0 + 'pi'+ #3#$BE + 'xi'; ent_3 = #3#$A7 + 'Chi'+ #0#208 + 'ETH'+ #3#$97 + 'Eta'+ #3#$A6 + 'Phi'+ #3#$A8 + 'Psi'+ #3#$A1 + 'Rho'+ #3#$A4 + 'Tau'+ #0#38 + 'amp'+ #$22#$27+ 'and'+ #$22#$20+ 'ang'+ #$22#$29+ 'cap'+ #3#$C7 + 'chi'+ #$22#$2A+ 'cup'+ #0#176 + 'deg'+ #3#$B7 + 'eta'+ #0#240 + 'eth'+ #$22#$2B+ 'int'+ #$25#$CA+ 'loz'+ #$20#$0E+ 'lrm'+ #0#172 + 'not'+ #3#$C6 + 'phi'+ #3#$D6 + 'piv'+ #3#$C8 + 'psi'+ #0#174 + 'reg'+ #3#$C1 + 'rho'+ #$20#$0F+ 'rlm'+ #0#173 + 'shy'+ #$22#$3C+ 'sim'+ #$22#$82+ 'sub'+ #$22#$11+ 'sum'+ #$22#$83+ 'sup'+ #3#$C4 + 'tau'+ #0#168 + 'uml'+ #0#165 + 'yen'+ #$20#$0D+ 'zwj'; ent_4 = #0#196 + 'Auml'+ #3#$92 + 'Beta'+ #0#203 + 'Euml'+ #3#$99 + 'Iota'+ #0#207 + 'Iuml'+ #0#214 + 'Ouml'+ #0#220 + 'Uuml'+ #1#$78 + 'Yuml'+ #3#$96 + 'Zeta'+ #0#228 + 'auml'+ #3#$B2 + 'beta'+ #$20#$22+ 'bull'+ #0#162 + 'cent'+ #2#$C6 + 'circ'+ #$22#$45+ 'cong'+ #0#169 + 'copy'+ #$21#$D3+ 'dArr'+ #$21#$93+ 'darr'+ #$20#$03+ 'emsp'+ #$20#$02+ 'ensp'+ #0#235 + 'euml'+ #$20#$AC+ 'euro'+ #1#$92 + 'fnof'+ #$21#$D4+ 'hArr'+ #$21#$94+ 'harr'+ #3#$B9 + 'iota'+ #$22#$08+ 'isin'+ #0#239 + 'iuml'+ #$21#$D0+ 'lArr'+ #$23#$29+ 'lang'+ #$21#$90+ 'larr'+ #0#175 + 'macr'+ #0#160 + 'nbsp'+ #$22#$84+ 'nsub'+ #0#170 + 'ordf'+ #0#186 + 'ordm'+ #0#246 + 'ouml'+ #0#182 + 'para'+ #$22#$02+ 'part'+ #$22#$A5+ 'perp'+ #$22#$0F+ 'prod'+ #$22#$1D+ 'prop'+ #0#34 + 'quot'+ #$21#$D2+ 'rArr'+ #$23#$2A+ 'rang'+ #$21#$92+ 'rarr'+ #$21#$1C+ 'real'+ #$22#$C5+ 'sdot'+ #0#167 + 'sect'+ #$22#$86+ 'sube'+ #0#185 + 'sup1'+ #0#178 + 'sup2'+ #0#179 + 'sup3'+ #$22#$87+ 'supe'+ #$21#$D1+ 'uArr'+ #$21#$91+ 'uarr'+ #0#252 + 'uuml'+ #0#255 + 'yuml'+ #3#$B6 + 'zeta'+ #$20#$0C+ 'zwnj'; ent_5 = #0#198 + 'AElig'+ #0#194 + 'Acirc'+ #3#$91 + 'Alpha'+ #0#197 + 'Aring'+ #3#$94 + 'Delta'+ #0#202 + 'Ecirc'+ #3#$93 + 'Gamma'+ #0#206 + 'Icirc'+ #3#$9A + 'Kappa'+ #1#$52 + 'OElig'+ #0#212 + 'Ocirc'+ #3#$A9 + 'Omega'+ #$20#$33+ 'Prime'+ #3#$A3 + 'Sigma'+ #0#222 + 'THORN'+ #3#$98 + 'Theta'+ #0#219 + 'Ucirc'+ #0#226 + 'acirc'+ #0#180 + 'acute'+ #0#230 + 'aelig'+ #3#$B1 + 'alpha'+ #0#229 + 'aring'+ #$22#$48+ 'asymp'+ #$20#$1E+ 'bdquo'+ #0#184 + 'cedil'+ #$26#$63+ 'clubs'+ #$21#$B5+ 'crarr'+ #3#$B4 + 'delta'+ #$26#$66+ 'diams'+ #0#234 + 'ecirc'+ #$22#$05+ 'empty'+ #$22#$61+ 'equiv'+ #$22#$03+ 'exist'+ #$20#$44+ 'frasl'+ #3#$B3 + 'gamma'+ #0#238 + 'icirc'+ #0#161 + 'iexcl'+ #$21#$11+ 'image'+ #$22#$1E+ 'infin'+ #3#$BA + 'kappa'+ #0#171 + 'laquo'+ #$23#$08+ 'lceil'+ #$20#$1C+ 'ldquo'+ #$20#$18+ 'lsquo'+ #$20#$14+ 'mdash'+ #0#181 + 'micro'+ #$22#$12+ 'minus'+ #$22#$07+ 'nabla'+ #$20#$13+ 'ndash'+ #$22#$09+ 'notin'+ #0#244 + 'ocirc'+ #1#$53 + 'oelig'+ #$20#$3E+ 'oline'+ #3#$C9 + 'omega'+ #$22#$95+ 'oplus'+ #0#163 + 'pound'+ #$20#$32+ 'prime'+ #$22#$1A+ 'radic'+ #0#187 + 'raquo'+ #$23#$09+ 'rceil'+ #$20#$1D+ 'rdquo'+ #$20#$19+ 'rsquo'+ #$20#$1A+ 'sbquo'+ #3#$C3 + 'sigma'+ #0#223 + 'szlig'+ #3#$B8 + 'theta'+ #0#254 + 'thorn'+ #2#$DC + 'tilde'+ #0#215 + 'times'+ #$21#$22+ 'trade'+ #0#251 + 'ucirc'+ #3#$D2 + 'upsih'; ent_6 = #0#193 + 'Aacute'+ #0#192 + 'Agrave'+ #0#195 + 'Atilde'+ #0#199 + 'Ccedil'+ #$20#$21+ 'Dagger'+ #0#201 + 'Eacute'+ #0#200 + 'Egrave'+ #0#205 + 'Iacute'+ #0#204 + 'Igrave'+ #3#$9B + 'Lambda'+ #0#209 + 'Ntilde'+ #0#211 + 'Oacute'+ #0#210 + 'Ograve'+ #0#216 + 'Oslash'+ #0#213 + 'Otilde'+ #1#$60 + 'Scaron'+ #0#218 + 'Uacute'+ #0#217 + 'Ugrave'+ #0#221 + 'Yacute'+ #0#225 + 'aacute'+ #0#224 + 'agrave'+ #0#227 + 'atilde'+ #0#166 + 'brvbar'+ #0#231 + 'ccedil'+ #0#164 + 'curren'+ #$20#$20+ 'dagger'+ #0#247 + 'divide'+ #0#233 + 'eacute'+ #0#232 + 'egrave'+ #$22#$00+ 'forall'+ #0#189 + 'frac12'+ #0#188 + 'frac14'+ #0#190 + 'frac34'+ #$26#$65+ 'hearts'+ #$20#$26+ 'hellip'+ #0#237 + 'iacute'+ #0#236 + 'igrave'+ #0#191 + 'iquest'+ #3#$BB + 'lambda'+ #$23#$0A+ 'lfloor'+ #$22#$17+ 'lowast'+ #$20#$39+ 'lsaquo'+ #0#183 + 'middot'+ #0#241 + 'ntilde'+ #0#243 + 'oacute'+ #0#242 + 'ograve'+ #0#248 + 'oslash'+ #0#245 + 'otilde'+ #$22#$97+ 'otimes'+ #$20#$30+ 'permil'+ #0#177 + 'plusmn'+ #$23#$0B+ 'rfloor'+ #$20#$3A+ 'rsaquo'+ #1#$61 + 'scaron'+ #3#$C2 + 'sigmaf'+ #$26#$60+ 'spades'+ #$22#$34+ 'there4'+ #$20#$09+ 'thinsp'+ #0#250 + 'uacute'+ #0#249 + 'ugrave'+ #$21#$18+ 'weierp'+ #0#253 + 'yacute'; ent_7 = #3#$95 + 'Epsilon'+ #3#$9F + 'Omicron'+ #3#$A5 + 'Upsilon'+ #$21#$35+ 'alefsym'+ #3#$B5 + 'epsilon'+ #3#$BF + 'omicron'+ #3#$C5 + 'upsilon'; ent_8 = #3#$D1 + 'thetasym'; strs: array[2..8] of string = ( ent_2, ent_3, ent_4, ent_5, ent_6, ent_7, ent_8 ); function BSearch(P: PWideChar; Len: Integer; const data: string): WideChar; var L, H, mid, J, C: Integer; begin Result := #0; L := 0; H := (Length(data)+1) div (Len+2); while L <= H do begin mid := L + ((H - L) shr 1); J := 0; repeat C := ord(P[J]) - ord(data[mid*(Len+2)+3+J]); Inc(J); until (C <> 0) or (J >= Len); if C > 0 then L := mid + 1 else begin H := mid - 1; if C = 0 then begin Result := WideChar((ord(data[mid*(Len+2)+1]) shl 8) or ord(data[mid*(Len+2)+2])); Exit; end; end; end; end; { Remaining issues: 1) UTF-16 surrogate pairs 2) HTML accepts uppercase 'X' for hex notation, but XML does not. 3) 'apos' is used in xml/xhtml, but not in HTML 4.01 } function ResolveHTMLEntityReference(const Name: WideString; var Entity: WideChar): Boolean; var i, L: Integer; value: Integer; begin L := Length(Name); if (L > 1) and (Name[1] = '#') then begin value := 0; if (Name[2] = 'x') or (Name[2] = 'X') then begin i := 3; while i <= L do begin case Name[i] of '0'..'9': Value := Value * 16 + Ord(Name[i]) - Ord('0'); 'a'..'f': Value := Value * 16 + Ord(Name[i]) - (Ord('a') - 10); 'A'..'F': Value := Value * 16 + Ord(Name[i]) - (Ord('A') - 10); else Break; end; Inc(i); end; end else begin i := 2; while i <= L do begin case Name[i] of '0'..'9': Value := Value * 10 + Ord(Name[i]) - Ord('0'); else Break; end; Inc(i); end; end; Result := (i = L+1); if Result then Entity := WideChar(Value); end else begin case L of 2..8: Entity := BSearch(PWideChar(Name), L, strs[L]); else Entity := #0; end; Result := (Entity <> #0); end; end; function IsAutoClose(NewTag, OldTag: THTMLElementTag): Boolean; var i, j: Integer; begin Result := False; for i := 0 to high(AutoCloseIndex) do if NewTag = AutoCloseTab[AutoCloseIndex[i]] then begin j := AutoCloseIndex[i]+1; while AutoCloseTab[j] <> etUnknown do begin if AutoCloseTab[j] = OldTag then begin Result := True; Exit; end; Inc(j); end; Exit; end; end; end.