{ This file is part of the Free Component Library HTML writing routines Copyright (c) 2000-2002 by Areca Systems GmbH / Sebastian Guenther, sg@freepascal.org See the file COPYING.FPC, included in this distribution, for details about the copyright. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. A temporarily variant of htmwrite that is forced to output 8859 instead of utf-8. We use this for the documentation tools because the textmode IDE and other older systems don't like utf-8. **********************************************************************} unit HTMWrite8859; {$MODE objfpc} {$H+} interface uses Classes, DOM; procedure WriteHTMLFile(doc: TXMLDocument; const AFileName: String); procedure WriteHTMLFile(doc: TXMLDocument; var AFile: Text); procedure WriteHTMLFile(doc: TXMLDocument; AStream: TStream); procedure WriteHTML(Element: TDOMNode; const AFileName: String); procedure WriteHTML(Element: TDOMNode; var AFile: Text); procedure WriteHTML(Element: TDOMNode; AStream: TStream); // =================================================================== implementation uses SysUtils, HTMLDefs; type TSpecialCharCallback = procedure(c: WideChar) of object; THTMLWriter = class(TObject) private FInsideTextNode: Boolean; FBuffer: PChar; FBufPos: PChar; FCapacity: Integer; FLineBreak: string; procedure wrtChars(Src: PWideChar; Length: Integer); procedure wrtStr(const ws: WideString); {$IFDEF HAS_INLINE} inline; {$ENDIF} procedure wrtChr(c: WideChar); {$IFDEF HAS_INLINE} inline; {$ENDIF} procedure wrtIndent; {$IFDEF HAS_INLINE} inline; {$ENDIF} procedure wrtQuotedLiteral(const ws: WideString); procedure ConvWrite(const s: WideString; const SpecialChars: TSetOfChar; const SpecialCharCallback: TSpecialCharCallback); procedure AttrSpecialCharCallback(c: WideChar); procedure TextNodeSpecialCharCallback(c: WideChar); protected procedure Write(const Buffer; Count: Longint); virtual; abstract; procedure WriteNode(Node: TDOMNode); procedure VisitDocument(Node: TDOMNode); procedure VisitElement(Node: TDOMNode); procedure VisitText(Node: TDOMNode); procedure VisitCDATA(Node: TDOMNode); procedure VisitComment(Node: TDOMNode); procedure VisitFragment(Node: TDOMNode); procedure VisitAttribute(Node: TDOMNode); procedure VisitEntityRef(Node: TDOMNode); procedure VisitDocumentType(Node: TDOMNode); procedure VisitPI(Node: TDOMNode); public constructor Create; destructor Destroy; override; end; TTextHTMLWriter = Class(THTMLWriter) Private F : ^Text; Protected Procedure Write(Const Buffer; Count : Longint);override; Public constructor Create(var AFile: Text); end; TStreamHTMLWriter = Class(THTMLWriter) Private F : TStream; Protected Procedure Write(Const Buffer; Count : Longint);override; Public constructor Create(AStream: TStream); end; { --------------------------------------------------------------------- TTextHTMLWriter ---------------------------------------------------------------------} constructor TTextHTMLWriter.Create(var AFile: Text); begin inherited Create; f := @AFile; end; procedure TTextHTMLWriter.Write(const Buffer; Count: Longint); var s: string; begin if Count>0 then begin SetString(s, PChar(@Buffer), Count); system.Write(f^, s); end; end; { --------------------------------------------------------------------- TStreamHTMLWriter ---------------------------------------------------------------------} constructor TStreamHTMLWriter.Create(AStream: TStream); begin inherited Create; F := AStream; end; procedure TStreamHTMLWriter.Write(const Buffer; Count: Longint); begin if Count > 0 then F.Write(Buffer, Count); end; { --------------------------------------------------------------------- THTMLWriter ---------------------------------------------------------------------} constructor THTMLWriter.Create; begin inherited Create; // some overhead - always be able to write at least one extra UCS4 FBuffer := AllocMem(512+32); FBufPos := FBuffer; FCapacity := 512; // Later on, this may be put under user control // for now, take OS setting FLineBreak := sLineBreak; end; destructor THTMLWriter.Destroy; begin if FBufPos > FBuffer then write(FBuffer^, FBufPos-FBuffer); FreeMem(FBuffer); inherited Destroy; end; procedure THTMLWriter.wrtChars(Src: PWideChar; Length: Integer); var pb: PChar; wc: Cardinal; SrcEnd: PWideChar; dummy : string[10]; i : integer; begin pb := FBufPos; SrcEnd := Src + Length; while Src < SrcEnd do begin if pb >= @FBuffer[FCapacity] then begin write(FBuffer^, FCapacity); Dec(pb, FCapacity); if pb > FBuffer then Move(FBuffer[FCapacity], FBuffer^, pb - FBuffer); end; wc := Cardinal(Src^); Inc(Src); case wc of $0A: pb := StrECopy(pb, PChar(FLineBreak)); $0D: begin pb := StrECopy(pb, PChar(FLineBreak)); if (Src < SrcEnd) and (Src^ = #$0A) then Inc(Src); end; 0..$09, $0B, $0C, $0E..$7F: begin pb^ := char(wc); Inc(pb); end; $80..$ffff: begin pb^:='&'; inc(pb); pb^:='#'; inc(pb); str(wc,dummy); for i:=1 to System.length(dummy) do begin pb^:=dummy[i]; inc(pb); end; pb^:=';'; inc(pb); end; end; end; FBufPos := pb; end; procedure THTMLWriter.wrtStr(const ws: WideString); { inline } begin wrtChars(PWideChar(ws), Length(ws)); end; { No checks here - buffer always has 32 extra bytes } procedure THTMLWriter.wrtChr(c: WideChar); { inline } begin FBufPos^ := char(ord(c)); Inc(FBufPos); end; procedure THTMLWriter.wrtIndent; { inline } begin wrtChars(#10, 1); end; procedure THTMLWriter.wrtQuotedLiteral(const ws: WideString); var Quote: WideChar; begin // TODO: need to check if the string also contains single quote // both quotes present is a error if Pos('"', ws) > 0 then Quote := '''' else Quote := '"'; wrtChr(Quote); wrtStr(ws); wrtChr(Quote); end; const AttrSpecialChars = ['<', '"', '&']; TextSpecialChars = ['<', '>', '&']; procedure THTMLWriter.ConvWrite(const s: WideString; const SpecialChars: TSetOfChar; const SpecialCharCallback: TSpecialCharCallback); var StartPos, EndPos: Integer; begin StartPos := 1; EndPos := 1; while EndPos <= Length(s) do begin if (s[EndPos] < #255) and (Char(ord(s[EndPos])) in SpecialChars) then begin wrtChars(@s[StartPos], EndPos - StartPos); SpecialCharCallback(s[EndPos]); StartPos := EndPos + 1; end; Inc(EndPos); end; if StartPos <= length(s) then wrtChars(@s[StartPos], EndPos - StartPos); end; const QuotStr = '"'; AmpStr = '&'; ltStr = '<'; gtStr = '>'; procedure THTMLWriter.AttrSpecialCharCallback(c: WideChar); begin case c of '"': wrtStr(QuotStr); '&': wrtStr(AmpStr); '<': wrtStr(ltStr); else wrtChr(c); end; end; procedure THTMLWriter.TextnodeSpecialCharCallback(c: WideChar); begin case c of '<': wrtStr(ltStr); '>': wrtStr(gtStr); // Required only in ']]>' literal, otherwise optional '&': wrtStr(AmpStr); else wrtChr(c); end; end; procedure THTMLWriter.WriteNode(node: TDOMNode); begin case node.NodeType of ELEMENT_NODE: VisitElement(node); ATTRIBUTE_NODE: VisitAttribute(node); TEXT_NODE: VisitText(node); CDATA_SECTION_NODE: VisitCDATA(node); ENTITY_REFERENCE_NODE: VisitEntityRef(node); PROCESSING_INSTRUCTION_NODE: VisitPI(node); COMMENT_NODE: VisitComment(node); DOCUMENT_NODE: VisitDocument(node); DOCUMENT_TYPE_NODE: VisitDocumentType(node); ENTITY_NODE, DOCUMENT_FRAGMENT_NODE: VisitFragment(node); end; end; procedure THTMLWriter.VisitElement(node: TDOMNode); var i: Integer; child: TDOMNode; SavedInsideTextNode: Boolean; s: string; ElFlags: THTMLElementFlags; j: THTMLElementTag; meta: Boolean; begin if not FInsideTextNode then wrtIndent; meta := False; s := LowerCase(node.NodeName); ElFlags := [efSubelementContent, efPCDATAContent]; // default flags for j := Low(THTMLElementTag) to High(THTMLElementTag) do if HTMLElementProps[J].Name = s then begin ElFlags := HTMLElementProps[j].Flags; if j = etMeta then meta := True; break; end; wrtChr('<'); wrtStr(TDOMElement(node).TagName); { Force charset label to utf-8, because it is the encoding we actually write } if meta then begin s := TDOMElement(node).GetAttribute('http-equiv'); if SameText(s, 'content-type') then begin wrtStr(' content="text/html; charset=iso8859-1" http-equiv="Content-Type">'); Exit; end; end; if node.HasAttributes then for i := 0 to node.Attributes.Length - 1 do begin child := node.Attributes.Item[i]; VisitAttribute(child); end; wrtChr('>'); Child := node.FirstChild; if Child <> nil then begin SavedInsideTextNode := FInsideTextNode; FInsideTextNode := efPCDATAContent in ElFlags; repeat WriteNode(Child); Child := Child.NextSibling; until Child = nil; FInsideTextNode := SavedInsideTextNode; end; if (not FInsideTextNode) and not (efPCDATAContent in ElFlags) then wrtIndent; if ElFlags * [efSubelementContent, efPCDATAContent] <> [] then begin wrtChars(''); end; end; procedure THTMLWriter.VisitText(node: TDOMNode); begin ConvWrite(TDOMCharacterData(node).Data, TextSpecialChars, {$IFDEF FPC}@{$ENDIF}TextnodeSpecialCharCallback); end; procedure THTMLWriter.VisitCDATA(node: TDOMNode); begin if not FInsideTextNode then wrtIndent; wrtChars('', 3); end; procedure THTMLWriter.VisitEntityRef(node: TDOMNode); begin wrtChr('&'); wrtStr(node.NodeName); wrtChr(';'); end; procedure THTMLWriter.VisitPI(node: TDOMNode); begin if not FInsideTextNode then wrtIndent; wrtStr(''); end; procedure THTMLWriter.VisitComment(node: TDOMNode); begin if not FInsideTextNode then wrtIndent; wrtChars('', 3); end; procedure THTMLWriter.VisitDocument(node: TDOMNode); var child: TDOMNode; begin child := node.FirstChild; while Assigned(Child) do begin WriteNode(Child); Child := Child.NextSibling; end; wrtChars(#10, 1); end; procedure THTMLWriter.VisitAttribute(Node: TDOMNode); var Child: TDOMNode; begin wrtChr(' '); wrtStr(TDOMAttr(Node).Name); wrtChars('="', 2); Child := Node.FirstChild; while Assigned(Child) do begin case Child.NodeType of ENTITY_REFERENCE_NODE: VisitEntityRef(Child); TEXT_NODE: ConvWrite(TDOMCharacterData(Child).Data, AttrSpecialChars, {$IFDEF FPC}@{$ENDIF}AttrSpecialCharCallback); end; Child := Child.NextSibling; end; wrtChr('"'); end; procedure THTMLWriter.VisitDocumentType(Node: TDOMNode); begin wrtStr(' '' then begin wrtStr('PUBLIC '); wrtQuotedLiteral(PublicID); if SystemID <> '' then begin wrtChr(' '); wrtQuotedLiteral(SystemID); end; end else if SystemID <> '' then begin wrtStr('SYSTEM '); wrtQuotedLiteral(SystemID); end; end; wrtChr('>'); end; procedure THTMLWriter.VisitFragment(Node: TDOMNode); var Child: TDOMNode; begin // Fragment itself should not be written, only its children should... Child := Node.FirstChild; while Assigned(Child) do begin WriteNode(Child); Child := Child.NextSibling; end; end; // ------------------------------------------------------------------- // Interface implementation // ------------------------------------------------------------------- procedure WriteHTMLFile(doc: TXMLDocument; const AFileName: String); var fs: TFileStream; begin fs := TFileStream.Create(AFileName, fmCreate); try WriteHTMLFile(doc, fs); finally fs.Free; end; end; procedure WriteHTMLFile(doc: TXMLDocument; var AFile: Text); begin with TTextHTMLWriter.Create(AFile) do try WriteNode(doc); finally Free; end; end; procedure WriteHTMLFile(doc: TXMLDocument; AStream: TStream); begin with TStreamHTMLWriter.Create(AStream) do try WriteNode(doc); finally Free; end; end; procedure WriteHTML(Element: TDOMNode; const AFileName: String); begin WriteHTMLFile(TXMLDocument(Element), AFileName); end; procedure WriteHTML(Element: TDOMNode; var AFile: Text); begin WriteHTMLFile(TXMLDocument(Element), AFile); end; procedure WriteHTML(Element: TDOMNode; AStream: TStream); begin WriteHTMLFile(TXMLDocument(Element), AStream); end; end.