1
0

[mod] outline

This commit is contained in:
Andy Bunce 2024-02-26 16:27:01 +00:00
parent d0e746a45a
commit 3ba5b52227
2 changed files with 44 additions and 17 deletions

View File

@ -41,10 +41,11 @@ declare namespace PDOutlineItem="java:org.apache.pdfbox.pdmodel.interactive.docu
declare namespace File ="java:java.io.File"; declare namespace File ="java:java.io.File";
declare namespace RandomAccessReadBufferedFile = "java:org.apache.pdfbox.io.RandomAccessReadBufferedFile"; declare namespace RandomAccessReadBufferedFile = "java:org.apache.pdfbox.io.RandomAccessReadBufferedFile";
(:~ open pdf, returns handle :)
declare function pdfbox:open($pdfpath as xs:string){ declare function pdfbox:open($pdfpath as xs:string){
Loader:loadPDF( RandomAccessReadBufferedFile:new($pdfpath)) Loader:loadPDF( RandomAccessReadBufferedFile:new($pdfpath))
}; };
(:~ save pdf $doc to $savepath , returns $savepath :)
declare function pdfbox:save($doc,$savepath as xs:string) declare function pdfbox:save($doc,$savepath as xs:string)
as xs:string{ as xs:string{
PDDocument:save($doc,File:new($savepath)),$savepath PDDocument:save($doc,File:new($savepath)),$savepath
@ -70,28 +71,53 @@ as xs:integer{
} }
:) :)
declare function pdfbox:siblings($acc as item()*,$outlineItem ,$doc as item()) declare function pdfbox:siblings($acc as item()*,$outlineItem ,$doc as item())
{ as map(*)*{
(# db:wrapjava all #) { if(empty($outlineItem))
if(empty($outlineItem))
then $acc then $acc
else let $next:= PDOutlineItem:getNextSibling($outlineItem)=>trace("next: ") else let $next:= PDOutlineItem:getNextSibling($outlineItem)=>trace("next: ")
return pdfbox:siblings($acc,pdfbox:bookmark($outlineItem ,$doc), return pdfbox:siblings($acc,pdfbox:bookmark($outlineItem ,$doc),
$next $next
) )
}
};
(: return bookmark info for children of $outlineItem :)
declare function pdfbox:sibs($outlineItem,$doc )
as map(*)*
{
hof:until(
function($output) { empty($output?this) },
function($input ) { map{
"list":($input?list,pdfbox:bookmark($input?this,$doc)),
"this": PDOutlineItem:getNextSibling($input?this)}
},
map{"list":(),"this":$outlineItem}
)
}; };
declare function pdfbox:outline($doc as item()) declare function pdfbox:outline($doc as item())
as item()*{ as map(*)*{
let $bookmark:=(# db:wrapjava all #) { (# db:wrapjava some #) {
let $bookmark:=
PDDocument:getDocumentCatalog($doc) PDDocument:getDocumentCatalog($doc)
=>PDDocumentCatalog:getDocumentOutline() =>PDDocumentCatalog:getDocumentOutline()
=>PDOutlineItem:getFirstChild()=>trace("cur") =>PDOutlineItem:getFirstChild()=>trace("cur")
}
(: return hof:until(empty#1,pdfbox:outx(?,$doc),()) :)
let $bk:=pdfbox:siblings((),$bookmark ,$doc)
return $bk (: let $bk:=pdfbox:siblings((),$bookmark ,$doc) :)
let $bk:=pdfbox:sibs($bookmark ,$doc)?list
(: let $bookmark := PDOutlineItem:getNextSibling($bookmark)
let $bk := ($bk, pdfbox:bookmark($bookmark, $doc))
let $bookmark := PDOutlineItem:getNextSibling($bookmark)
let $bk := ($bk, pdfbox:bookmark($bookmark, $doc))
let $bookmark := PDOutlineItem:getNextSibling($bookmark)
let $bk := ($bk, pdfbox:bookmark($bookmark, $doc))
let $bookmark := PDOutlineItem:getNextSibling($bookmark)
let $bk := ($bk, pdfbox:bookmark($bookmark, $doc))
let $bookmark := PDOutlineItem:getNextSibling($bookmark) => trace("EMP")
let $bk := ($bk, if(exists($bookmark)) then pdfbox:bookmark($bookmark, $doc)) :)
return $bk
}
}; };
declare function pdfbox:bookmark($bookmark as item(),$doc as item()) declare function pdfbox:bookmark($bookmark as item(),$doc as item())
@ -100,7 +126,8 @@ as map(*){
(: return hof:until(empty#1,pdfbox:outx(?,$doc),()) :) (: return hof:until(empty#1,pdfbox:outx(?,$doc),()) :)
return map{ return map{
"index": pdfbox:pageIndex($currentPage,$doc), "index": pdfbox:pageIndex($currentPage,$doc),
"title": PDOutlineItem:getTitle($bookmark) "title": PDOutlineItem:getTitle($bookmark),
"hasChildren": PDOutlineItem:hasChildren($bookmark)
} }
}; };
@ -110,14 +137,14 @@ declare function pdfbox:outx($page,$document){
return $pageNumber return $pageNumber
}; };
(: pageIndex of $page in $document :) (:~ pageIndex of $page in $document :)
declare function pdfbox:pageIndex( declare function pdfbox:pageIndex(
$page (: as java:org.apache.pdfbox.pdmodel.PDPage :), $page (: as java:org.apache.pdfbox.pdmodel.PDPage :),
$document) $document)
{ {
PDDocument:getDocumentCatalog($document) PDDocument:getDocumentCatalog($document)
=>PDDocumentCatalog:getPages() =>PDDocumentCatalog:getPages()
=>PDPageTree:indexOf($page) =>PDPageTree:indexOf($page)
}; };

View File

@ -22,4 +22,4 @@ declare variable $PDF:= $samples?climate=>file:resolve-path($base);
let $doc:=pdfbox:open($PDF) let $doc:=pdfbox:open($PDF)
return pdfbox:outline($doc) return pdfbox:outline($doc)
(: return pdfbox:extract($doc,"c:\tmp\junk2.pdf",50,100) :) (: return pdfbox:extract($doc,"c:\tmp\junk3.pdf",1,pdfbox:page-count($doc)) :)