[mod] docs

This commit is contained in:
Andy Bunce 2025-06-23 20:21:26 +01:00
parent 6dca7f3887
commit f96c64286d
16 changed files with 252 additions and 296 deletions

View file

@ -35,7 +35,7 @@ declare namespace rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#";
declare namespace RandomAccessReadBuffer="java:org.apache.pdfbox.io.RandomAccessReadBuffer";
declare namespace RandomAccessReadBufferedFile = "java:org.apache.pdfbox.io.RandomAccessReadBufferedFile";
declare namespace PDRectangle="org.apache.pdfbox.pdmodel.common.PDRectangle";
declare namespace PDRectangle="java:org.apache.pdfbox.pdmodel.common.PDRectangle";
declare namespace File ="java:java.io.File";
@ -58,11 +58,6 @@ as item()*{
};
(:~ open pdf using fetch:binary, returns pdf object :)
declare function pdfbox:open($pdfsrc as item())
as item(){
pdfbox:open($pdfsrc, map{})
};
(:~ open pdf from file/url/binary, opts may have password , returns pdf object
@param $pdfsrc a fetchable url or filepath, or xs:base64Binary item
@ -87,6 +82,13 @@ as item(){
}
};
(:~ open pdf from a location, returns pdf object :)
declare function pdfbox:open($pdfsrc as item())
as item(){
pdfbox:open($pdfsrc, map{})
};
(:~ The version of the PDF specification used by $pdf e.g "1.4"
returned as string to avoid float rounding issues
:)
@ -95,13 +97,13 @@ as xs:string{
PDDocument:getVersion($pdf)=>xs:decimal()=>round(4)=>string()
};
(:~ Save pdf $pdf to filesystem at $savepath , returns $savepath :)
(:~ Save pdf <code>$pdf</code> to filesystem at <code>$savepath</code> , returns $savepath :)
declare function pdfbox:pdf-save($pdf as item(),$savepath as xs:string)
as xs:string{
PDDocument:save($pdf, File:new($savepath)),$savepath
};
(:~ Create binary representation of $pdf object as xs:base64Binary :)
(:~ Create binary representation (xs:base64Binary) of <code>$pdf</code> object :)
declare function pdfbox:binary($pdf as item())
as xs:base64Binary{
let $bytes:=Q{java:java.io.ByteArrayOutputStream}new()
@ -198,12 +200,6 @@ as item()*{
else error(xs:QName('pdfbox:property'),concat("Property '",$property,"' not defined."))
};
(:~ summary CSV style info for all properties for $pdfpaths
:)
declare function pdfbox:report($pdfpaths as xs:string*)
as map(*){
pdfbox:report($pdfpaths,pdfbox:property-names())
};
(:~ summary CSV style info for named $properties for PDFs in $pdfpaths
@see https://docs.basex.org/main/CSV_Functions#xquery
@ -233,6 +229,13 @@ as map(*){
}
};
(:~ summary CSV style info for all properties for $pdfpaths
:)
declare function pdfbox:report($pdfpaths as xs:string*)
as map(*){
pdfbox:report($pdfpaths,pdfbox:property-names())
};
(:~ Convenience function to save report() data to file :)
declare function pdfbox:report-save($data as map(*),$dest as xs:string)
as empty-sequence(){
@ -306,9 +309,14 @@ as map(*){
map{"list":(),"this":$outlineItem},
function($input,$pos ) {
let $bk:= pdfbox:bookmark($input?this,$pdf)
let $bk:= if($bk?hasChildren)
then let $kids:=pdfbox:outline($pdf,PDOutlineItem:getFirstChild($input?this))
let $bookmark:=$input?this
let $bk:=map{
"index": PDOutlineItem:findDestinationPage($bookmark,$pdf)=>pdfbox:find-page($pdf),
"title": (# db:checkstrings #) {PDOutlineItem:getTitle($bookmark)}
}
let $bk:= if(PDOutlineItem:hasChildren($bookmark))
then let $kids:=pdfbox:outline($pdf,PDOutlineItem:getFirstChild($bookmark))
return map:merge(($bk,map:entry("children",$kids)))
else $bk
return map{
@ -339,21 +347,6 @@ as element(bookmark)*
</bookmark>
};
(:~ Return bookmark info for $bookmark
@return map{index:..,title:..,hasChildren:..}
:)
declare %private function pdfbox:bookmark($bookmark as item(),$pdf as item())
as map(*)
{
map{
"index": PDOutlineItem:findDestinationPage($bookmark,$pdf)=>pdfbox:find-page($pdf),
"title": (# db:checkstrings #) {PDOutlineItem:getTitle($bookmark)}
(:=>translate("<22>",""), :),
"hasChildren": PDOutlineItem:hasChildren($bookmark)
}
};
(:~ pageIndex of $page in $pdf :)
declare function pdfbox:find-page(
$page as item()? (: as java:org.apache.pdfbox.pdmodel.PDPage :),