unit relinkhtml; // hackety script using DOM_HTML to fix the URLs in FPC doc. // Note: needs dom fixes in fcl-xml (2.3.1 of after july 2009) // // Warning: looking at this code might damage your eyes! // interface {$ifdef fpc} {$mode delphi} {$endif} {$define debugoutput} {define printattr} {define printchildren} uses typinfo,classes, dom,SAX_Html,dom_html,xmlutils,htmwrite8859; Type TNavButton = (NavNone,NavNext,NavUp,NavTail,NavPrev,NavPrevTail,navfront ); TNavArray = array [TNavButton] of TDomNode; THtmlDocFile = class // container object for file read from disk public title, // title of paragraph/section redtitle : string; // title with some substitutions done (appendix/chapter removed) Filename:string; // html filename. dom : THTMLDocument; numbers: array[0..4] of string; // doc hierachy splitted on the dots. ('1','1','1','' = section 1.1.1 etc) padnumbers: array[0..4] of string; // same as numbers but padded with zeroes to 5 chars. for ordering purposes. children : TStringlist; // child nodes navtable : array[0..1] of TNavArray; // nodes in navigation table. 0=top, 1=bottom Localnavtable: array[TNavButton] of THtmldocfile; // Calculated up/prev/next constructor create; procedure read(filenamewithpath,fn:string); procedure findtitle; procedure parsetitle; procedure scan_nav_table(scanforward:boolean;var navtab:TNavArray); function walk(prev,up:THtmlDocFile):THtmlDocFile; function walkback(next:THtmlDocFile):THtmlDocFile; function scantablenode(root:tdomnode;var resnode:Tdomnode):TNavButton; procedure navcheck; procedure patch; end; TIndexes = class // Collection of files + indexes. indexes: array[0..4] of TStringlist; // files sorted according to group htmls : TStringList; // all files. prefix :string; // prog, user etc. set before running procedure walk; procedure walkback; constructor create; procedure readfiles(basedir:string); procedure linkfiles; procedure writehtmls(basedir:string); procedure patch; property chapters :Tstringlist read indexes[0]; property sections :Tstringlist read indexes[1]; property subsections :Tstringlist read indexes[2]; // property appendices :Tstringlist read indexes[5]; property toc :Tstringlist read indexes[3]; property rest :Tstringlist read indexes[4]; end; const NavButtonCaptions : array[TNavButton] of String =('NONE!','next','up','tail','prev','prev-tail','front'); function A_NodeToURL(node:tdomnode):String; implementation uses sysutils,strutils; // Helper funcs. function A_NodeToURL(node:tdomnode):String; var n :TDomNode; begin result:=''; if not assigned(node.Attributes) then exit; n:=node.attributes.GetNamedItem('href'); if assigned(n) then result:=n.NodeValue; end; procedure setnode_tourl(node:TDomNode;value:string); var n :TDomNode; begin if not assigned(node.Attributes) then exit; n:=node.attributes.GetNamedItem('href'); if assigned(n) then n.NodeValue:=value; assert(n.NodeValue=value); end; procedure printattributes(node:TDomNode;title:string=''); var i :integer; Attributes:TDOMNamedNodeMap; begin if assigned(node) then begin Attributes:=node.Attributes; write(title,' '); if assigned(attributes) and (attributes.length>0) then for i := 0 to Attributes.Length - 1 do write(Attributes.item[i].nodevalue,' '); writeln; end; end; function searchtag(prnt:TDomNode;tag:string):TDomNode; // Seach first matching tag in siblings var chld: TDomNode; begin result:=nil; if assigned(prnt ) then begin chld:=prnt.firstchild; while assigned(chld) do begin if (chld is TDomElement) then begin if (TDomElement(chld).tagname=tag) then begin result:=chld; exit; end; end; chld:=chld.nextsibling; end; end; end; function searchtagback(prnt:TDomNode;tag:string):TDomNode; // Seach last matching tag in siblings var chld: TDomNode; begin result:=nil; if assigned(prnt ) then begin chld:=prnt.LastChild; while assigned(chld) do begin if (chld is TDomElement) then begin if (TDomElement(chld).tagname=tag) then begin result:=chld; exit; end; end; chld:=chld.PreviousSibling; end; end; end; function reducefilename(name:string):string; var i,j : integer; begin i:=1; j:=length(name); {while not (name[i] in ['0'..'9']) do inc(i); } result:=ChangeFileExt(copy(name,i,j-i+1),''); while length(result)<5 do result:='0'+result; end; function padzero(s:string):string; begin result:=s; while length(result)<5 do result:='0'+result; end; procedure printchildren(root:tdomnode); var node :TDomNode; children:TDOMNodeList; i: integer; begin children:=root.ChildNodes; for i:= 0 to children.Length- 1 do begin node:=children.Item[i]; write(node.classname,'"',node.NodeName,'" ',node.Nodevalue,' - ' ); end; writeln; end; { THtmlDocFile } constructor THtmlDocFile.create; begin fillchar(navtable,sizeof(navtable),#0); dom:=THTMLDocument.create; children:=TStringList.Create; children.Sorted:=true; end; procedure THtmlDocFile.findtitle; // Tries to find object(section,chapter etc) true title. Not necessarily title tag. procedure trytitle(node: TDomNode); // finds most section's title. var span,txt: TDomNode; begin if assigned(node) then begin span:=searchtag(node,'span'); if assigned(span) then begin txt:=span.firstchild; while assigned(txt) do begin if txt is TDomText then title:=title+trim(tDomText(txt).nodevalue); txt:=txt.nextsibling; end; end; end; end; procedure examinenode(txt:TDomNode;var title:ansistring); var s:ansistring; begin while assigned(txt) do begin if txt is TDomText then begin s:=trim(tDomText(txt).nodevalue); s:=ansireplacestr(s,chr(160),' '); title:=trim(title)+' '+s; end else examinenode(txt.firstchild,title); txt:=txt.nextsibling; end; end; procedure try_hx_a(nr:integer); // finds some sections with clickable titles var node: TDomNode; span,txt: TDomNode; begin node:=searchtag(dom.body,'h'+inttostr(nr)); if assigned(node) then begin examinenode(node.firstchild,title); (* span:=searchtag(node,'a'); // printchildren(span); if assigned(span) then begin examinenode(span,title); printchildren(span); // examinenode(span.firstchild,title); end; *) end; end; var node: TDomNode; l, i : integer; begin title:=''; for i := 1 to 4 do begin if title='' then begin node:=searchtag(dom.body,'h'+inttostr(i)); trytitle(node); end; end; // alternate locations: if title='' then try_hx_a(2); if title='' then try_hx_a(3); if title='' then try_hx_a(4); {$ifdef debugtitle} if title='' then writeln('no title:',filename) else writeln('title:',title,' in ',filename); {$endif} // strip appendix,chapter from title for easy indexing. redtitle:=title; if ansiStartsText('Chapter',redtitle) then delete(redtitle,1,8); if ansiStartsText('Appendix',redtitle) then delete(redtitle,1,9); l:=length(redtitle); redtitle:=trim(redtitle); i:=1; while (il then dec(i); setlength(redtitle,i); {$ifdef debugtitle} writeln('redtitle:"',redtitle,'"'); {$endif} end; procedure THtmlDocFile.navcheck; //unused debugproc to do a sanity check if top and bottom nodes don't match var isfirst:boolean; n :TNavButton; begin isfirst:=true; for n := low(n) to high(n) do if assigned(navtable[0][n]) and assigned(navtable[1][n]) and (navtable[0][n]<>navtable[1][n]) then begin if isfirst then write(':',title,' '); write(getenumname(typeinfo(tnavbutton),ord(n)),' ' ); isfirst:=false; end; if not isfirst then writeln; end; procedure THtmlDocFile.parsetitle; var i,j,k : Integer; s2:ansistring; code : longint; begin i:=1; j:=length(title); k:=0; if title<>'' then while (i<=j) and (k<=high(numbers)) do begin s2:=''; while (i<=j) and (title[i] in ['0'..'9','A'..'Z']) do begin s2:=s2+title[i]; inc(i); end; if length(s2)>0 then begin numbers[k]:=s2; padnumbers[k]:=padzero(s2); inc(k); // val(s2,numbers[k],code); inc(k); end else i:=j; inc(i); end; end; procedure THtmlDocFile.patch; // tries to patch up/down/next nodes etc. var n : TNavButton; fn : string; begin for n:= succ(low(n)) to high(n) do begin if assigned(Localnavtable[n]) then begin fn:=localnavtable[n].Filename; if assigned(navtable[0][n]) then setnode_tourl(navtable[0][n],fn); if assigned(navtable[1][n]) then setnode_tourl(navtable[1][n],fn); end; if n=NavTail then begin if assigned(navtable[0][n]) then setnode_tourl(navtable[0][n],'#tail'+filename); if assigned(navtable[1][n]) then setnode_tourl(navtable[1][n],'#tail'+filename); end; if (n=NavPrevTail) and assigned(localnavtable[navprev]) then begin fn:=localnavtable[navprev].Filename; if assigned(navtable[0][n]) then setnode_tourl(navtable[0][n],fn+'#'+fn); if assigned(navtable[1][n]) then setnode_tourl(navtable[1][n],fn+'#'+fn); end; if n=navfront then begin if assigned(navtable[0][n]) then setnode_tourl(navtable[0][n],filename); if assigned(navtable[1][n]) then setnode_tourl(navtable[1][n],filename); end; end; end; procedure THtmlDocFile.read(filenamewithpath,fn: string); begin ReadHtmlFile(dom,filenamewithpath); filename:=fn; findtitle; parsetitle; scan_nav_table(true,navtable[0]); scan_nav_table(false,navtable[1]); end; function THtmlDocFile.scantablenode(root: tdomnode;var resnode:Tdomnode):TNavbutton; var children: TDOMNodeList; i : integer; j : TNavButton; node : TDomNode; s : domstring; begin result:=NavNone; children:=root.ChildNodes; {$ifdef printattributes} writeln('root:', root.nodevalue); printattributes(root,''); {$endif} for i:= 0 to children.Length- 1 do begin node:=children.Item[i]; s:=node.nodevalue; if s='' then begin result:=scantablenode(node,resnode); if result<>navnone then begin exit(result); end; end; {$ifdef printattributes} writeln('navchild:',node.nodename,' ',s,' * '); writeln('attr:'); printattributes(node,''); printchildren(node); {$endif} j:=low(NavButtonCaptions); {$R-} while (j<=high(NavButtonCaptions)) and (navbuttoncaptions[j]<>s) do inc(j); if j<=high(navbuttoncaptions) then begin resnode:=node; exit(j); end; {$R+} end; end; procedure THtmlDocFile.scan_nav_table(scanforward:boolean;var navtab:TNavArray); var resnode,divnode,pnode,node :TDomNode; Attributes:TDOMNamedNodeMap; children:TDOMNodeList; i : integer; but : tnavbutton; begin if scanforward then divnode:=searchtag(dom.body,'div') else divnode:=searchtagback(dom.body,'div'); if assigned(divnode) then begin //printchildren(divnode); pnode:=searchtag(divnode,'p'); if assigned(pnode) then begin {$ifdef sdebugoutput} writeln(title,' '); {$endif} children:=pnode.ChildNodes; for i:= 0 to children.Length- 1 do begin node:=children.Item[i]; if node.NodeName='a' then begin resnode:=nil; but:=scantablenode(node,resnode); if but<>navnone then begin {$ifdef printchildren} writeln('found',but,' ',resnode.nodename,' ',resnode.nodevalue); {$endif} navtab[but]:=node; end; end; {$ifdef printchildren} write(' "',children.Item[i].NodeName,'" ',children.Item[i].Nodevalue,' - ' ); writeln; {$endif} end; end; {$ifdef printattributes} printattributes(pnode,title); {$endif} end; end; function THtmlDocFile.walk(prev,up: THtmlDocFile): THtmlDocFile; var cur : THtmlDocFile; i : Integer; begin localnavtable[NavUp]:=up; localnavtable[Navprev]:=prev; prev:=self; up:=self; for i := 0 to children.Count - 1 do begin cur:=THtmlDocFile(children.Objects[i]); prev:=cur.walk(prev,up) end; result:=prev; end; function THtmlDocFile.walkback(next: THtmlDocFile): THtmlDocFile; var cur : THtmlDocFile; i : Integer; begin for i := children.Count - 1 downto 0 do begin cur:=THtmlDocFile(children.Objects[i]); next:=cur.walkback(next) end; localnavtable[Navnext]:=next; result:=self; end; { TIndexes } constructor TIndexes.create; var i:integer; begin htmls:=TStringList.Create; htmls.sorted:=true; for i := 0 to high(indexes) do begin indexes[i]:=Tstringlist.Create; indexes[i].Sorted:=true; end; end; procedure TIndexes.linkfiles; var i,j : integer; sect,sect2,chap : THtmlDocFile; s : string; begin // collection chapters is zeropadded, rest not. // all "children" must be zero padded, since it is final ordering. for i:= 0 to sections.Count - 1 do begin sect:=THtmlDocFile(sections.Objects[i]); s:=sect.padnumbers[0]; j:=chapters.IndexOf(s); if j<>-1 then begin chap:=THtmlDocFile(chapters.Objects[j]); chap.children.AddObject(sect.padnumbers[1],sect); {$ifdef debugoutput} writeln('linking ',sect.redtitle,' to ',chap.redtitle); {$endif} end else writeln(sect.redtitle,' not found'); end; {$ifdef debugoutput} writeln('finished linking sections'); {$endif} for i:= 0 to subsections.Count - 1 do begin sect:=THtmlDocFile(subsections.Objects[i]); s:=sect.padnumbers[0]; j:=chapters.IndexOf(s); if j<>-1 then begin chap:=THtmlDocFile(chapters.Objects[j]); s:=sect.padnumbers[1]; j:=chap.children.IndexOf(s); if j<>-1 then begin sect2:=THtmlDocFile(chap.children.Objects[j]); sect2.children.addobject(sect.padnumbers[2],sect); {$ifdef debugoutput} writeln('linking ',sect.redtitle,' to ',sect2.redtitle); {$endif} end else writeln('Section for ',sect.redtitle,' not found'); end else writeln('Chapter for ',sect.redtitle,' not found'); end; end; procedure TIndexes.patch; var i : integer; begin for i := 0 to htmls.Count - 1 do THtmlDocFile(htmls.Objects[i]).patch; end; procedure TIndexes.readfiles(basedir:string); // load files, and sort them into an index of node types based on filename. var d : TSearchRec; v : THtmlDocFile; redname : string; begin if basedir<>'' then basedir:=IncludeTrailingPathDelimiter(basedir); if findfirst(basedir+'*.html',faanyfile,d)=0 then begin repeat v:=THtmlDocFile.create; v.read(basedir+d.Name,d.name); {$ifdef debugoutput} Writeln(' ',d.name); {$endif} delete(d.Name,1,length(prefix)); redname:=v.redtitle; // redtitle is paragraph title minus appendix/chapter etc. if ansiStartsText('ch',d.Name) then begin writeln('chap:',redname); chapters.AddObject(padzero(redName),v) // must be sorted from the start, so padded. end else if ansiStartsText('se',d.Name) then sections.AddObject(redName,v) else if ansiStartsText('su',d.Name) then subsections.AddObject(redName,v) else if ansiStartsText('ap',d.Name) then chapters.AddObject(padzero(redName),v) // treat appendix as chapter. "A" sorts after numbers else if ansiStartsText('li',d.Name) then toc.AddObject(redName,v) else rest.AddObject(redName,v); htmls.addobject(d.name,v); until findnext(d)<>0; findclose(d); end; end; procedure TIndexes.walk; // walk all chapters recursively and generate a prev, up nodes. var i : integer; up,prev: THtmlDocFile; cur : THtmlDocFile; begin prev:=nil; up:=nil; // should init to index or so. for i := 0 to chapters.Count - 1 do begin cur:=THtmlDocFile(chapters.Objects[i]); prev:=cur.walk(prev,up); end; end; procedure TIndexes.walkback; // walk all chapters backwards recursively and generate a next nodes. var i : integer; next: THtmlDocFile; cur : THtmlDocFile; begin next:=nil; for i := chapters.Count - 1 downto 0 do begin cur:=THtmlDocFile(chapters.Objects[i]); next:=cur.walkback(next); end; end; procedure TIndexes.writehtmls(basedir: string); var I : integer; sx :THTMLDocument; fnhtml:string; begin for i:=0 to htmls.count-1 do begin fnhtml:=basedir+pathdelim+prefix+htmls[i]; sx:=THtmlDocFile(htmls.objects[i]).dom; writehtml(sx,fnhtml); end; end; end.