{ Copyright (C) <2005> htmlutil.pas This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. } { See the file COPYING.FPC, included in this distribution, for details about the copyright. } { modified from jsFastHtmlParser for use with freepascal Original Author: James Azarja Contributor: Lars aka L505 http://z505.com Note: this isn't perfect, it needs to be improved.. see comments } unit HTMLUtil; {$ifdef fpc} {$MODE Delphi} {$H+}{$endif} interface uses SysUtils, strutils; { most commonly used } function GetVal(tag, attribname_ci: string): string; function GetTagName(Tag: string): string; { less commonly used, but useful } function GetUpTagName(tag: string): string; function GetNameValPair(tag, attribname_ci: string): string; function GetValFromNameVal(namevalpair: string): string; { old buggy code} function GetVal_JAMES(tag, attribname_ci: string): string; function GetNameValPair_JAMES(tag, attribname_ci: string): string; { rarely needed NAME= case sensitivity } function GetNameValPair_cs(tag, attribname: string): string; implementation function CopyBuffer(StartIndex: PChar; Len: integer): string; var s : String; begin SetLength(s, Len); StrLCopy(@s[1], StartIndex, Len); result:= s; end; { Return tag name, case preserved } function GetTagName(Tag: string): string; var P : Pchar; S : Pchar; begin P := Pchar(Tag); while P^ in ['<',' ',#9] do inc(P); S := P; while Not (P^ in [' ','>',#0]) do inc(P); if P > S then Result := CopyBuffer( S, P-S) else Result := ''; end; { Return tag name in uppercase } function GetUpTagName(tag: string): string; var P : Pchar; S : Pchar; begin P := Pchar(uppercase(Tag)); while P^ in ['<',' ',#9] do inc(P); S := P; while Not (P^ in [' ','>',#0]) do inc(P); if P > S then Result := CopyBuffer( S, P-S) else Result := ''; end; { Return name=value pair ignoring case of NAME, preserving case of VALUE Lars' fixed version } function GetNameValPair(tag, attribname_ci: string): string; var P : Pchar; S : Pchar; UpperTag, UpperAttrib : string; Start: integer; L : integer; C : char; begin // must be space before case insensitive NAME, i.e. nil then begin inc(S); // skip space P:= S; // Skip while not (P^ in ['=', ' ', '>', #0]) do inc(P); if (P^ = '=') then inc(P); while not (P^ in [' ','>',#0]) do begin if (P^ in ['"','''']) then begin C:= P^; inc(P); { Skip quote } end else C:= ' '; { thanks to Dmitry [mail@vader.ru] } while not (P^ in [C, '>', #0]) do Inc(P); if (P^ <> '>') then inc(P); { Skip current character, except '>' } break; end; L:= P - S; Start:= S - Pchar(UpperTag); P:= Pchar(Tag); S:= P; inc(S, Start); result:= CopyBuffer(S, L); end; end; { Get value of attribute, e.g WIDTH=36 -return-> 36, preserves case sensitive } function GetValFromNameVal(namevalpair: string): string; var P: Pchar; S: Pchar; C: Char; begin P:= Pchar(namevalpair); S:= StrPos(P, '='); if S <> nil then begin inc(S); // skip equal P:= S; // set P to a character after = if (P^ in ['"','''']) then begin C:= P^; Inc(P); { Skip current character } end else C:= ' '; S:= P; while not (P^ in [C, #0]) do inc(P); if (P <> S) then { Thanks to Dave Keighan (keighand@yahoo.com) } Result:= CopyBuffer(S, P - S) else Result:= ''; end; end; { return value of an attribute (attribname_ci), case ignored for NAME portion, but return value case is preserved } function GetVal(tag, attribname_ci: string): string; var namevalpair: string; begin // returns full name=value pair namevalpair:= GetNameValPair(tag, attribname_ci); // extracts value portion only result:= GetValFromNameVal(namevalpair); end; { ---------------------------------------------------------------------------- BELOW FUNCTIONS ARE OBSOLETE OR RARELY NEEDED SINCE THEY EITHER CONTAIN BUGS OR THEY ARE TOO CASE SENSITIVE (FOR THE TAG NAME PORTION OF THE ATTRIBUTE } { James old buggy code for testing purposes. Bug: when finding 'ID', function finds "width", even though width <> "id" } function GetNameValPair_JAMES(tag, attribname_ci: string): string; var P : Pchar; S : Pchar; UT, UA : string; Start: integer; L : integer; C : char; begin UA:= Uppercase(attribname_ci); UT:= Uppercase(Tag); P:= Pchar(UT); S:= StrPos(P, Pchar(UA)); if S <> nil then begin P := S; // Skip attribute name while not (P^ in ['=',' ','>',#0]) do inc(P); if (P^ = '=') then inc(P); while not (P^ in [' ','>',#0]) do begin if (P^ in ['"','''']) then begin C:= P^; inc(P); { Skip current character } end else C:= ' '; { thanks to Dmitry [mail@vader.ru] } while not (P^ in [C, '>', #0]) do Inc(P); if (P^ <> '>') then inc(P); { Skip current character, except '>' } break; end; L:= P - S; Start:= S - Pchar(UT); P:= Pchar(Tag); S:= P; inc(S, Start); result:= CopyBuffer(S, L); end; end; { James old buggy code for testing purposes } function GetVal_JAMES(tag, attribname_ci: string): string; var namevalpair: string; begin namevalpair:= GetNameValPair_JAMES(tag, attribname_ci); result:= GetValFromNameVal(namevalpair); end; { return name=value portion, case sensitive, case preserved } function GetNameValPair_cs(Tag, attribname: string): string; var P : Pchar; S : Pchar; C : Char; begin P := Pchar(Tag); S := StrPos(P, Pchar(attribname)); if S<>nil then begin P := S; // Skip attribute name while not (P^ in ['=',' ','>',#0]) do inc(P); if (P^ = '=') then inc(P); while not (P^ in [' ','>',#0]) do begin if (P^ in ['"','''']) then begin C:= P^; inc(P); { Skip current character } end else C:= ' '; { thanks to Dmitry [mail@vader.ru] } while not (P^ in [C, '>', #0]) do inc(P); if (P^<>'>') then inc(P); { Skip current character, except '>' } break; end; if P > S then Result:= CopyBuffer(S, P - S) else Result:= ''; end; end; end. (* alternative, not needed { return value (case preserved) from a name=value pair, ignores case in given NAME= portion } function GetValFromNameVal(namevalpair: string): string; type TAttribPos = record startpos: longword; // start pos of value len: longword; // length of value end; { returns case insensitive start position and length of just the value substring in name=value pair} function ReturnPos(attribute: string): TAttribPos; var P : Pchar; S : Pchar; C : Char; begin result.startpos:= 0; result.len:= 0; P:= Pchar(uppercase(Attribute)); // get substring including and everything after equal S:= StrPos(P, '='); result.startpos:= pos('=', P); if S <> nil then begin inc(S); // set to character after = inc(result.startpos); P:= S; if (P^ in ['"','''']) then begin C:= P^; // skip quote inc(P); inc(result.startpos); end else C:= ' '; S:= P; // go to end quote or end of value while not (P^ in [C, #0]) do inc(P); if (P <> S) then begin result.len:= p - s; end; end; end; var found: TAttribPos; begin found:= ReturnPos(namevalpair); // extract using coordinates result:= MidStr(namevalpair, found.startpos, found.len); end; *)