[mod] update to pdfbox 3.0.5

This commit is contained in:
Andy Bunce 2025-05-30 18:08:32 +01:00
parent 196ffa7526
commit a3bef9d6f4
7 changed files with 54 additions and 61 deletions

View file

@ -1,10 +1,10 @@
xquery version '3.1';
(:~
pdfbox 3.0 https://pdfbox.apache.org/ BaseX 10.7+ interface library,
A BaseX 10.7+ interface to pdfbox 3.0 https://pdfbox.apache.org/ ,
requires pdfbox jars on classpath, i.e. in custom or xar
tested with pdfbox-app-3.0.4.jar
@see download https://pdfbox.apache.org/download.cgi
@javadoc https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.4/
tested with pdfbox-app-3.0.5.jar
@see <a href="https://pdfbox.apache.org/download.cgi">download</a>
@javadoc https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.5/
@author Andy Bunce 2025
:)
@ -16,7 +16,7 @@ declare namespace PDDocument ="java:org.apache.pdfbox.pdmodel.PDDocument";
declare namespace PDDocumentCatalog ="java:org.apache.pdfbox.pdmodel.PDDocumentCatalog";
declare namespace PDPageLabels ="java:org.apache.pdfbox.pdmodel.common.PDPageLabels";
declare namespace PageExtractor ="java:org.apache.pdfbox.multipdf.PageExtractor";
declare namespace PDPage ="org.apache.pdfbox.pdmodel.PDPage";
declare namespace PDPage ="java:org.apache.pdfbox.pdmodel.PDPage";
declare namespace PDPageTree ="java:org.apache.pdfbox.pdmodel.PDPageTree";
declare namespace PDDocumentOutline ="java:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline";
declare namespace PDDocumentInformation ="java:org.apache.pdfbox.pdmodel.PDDocumentInformation";
@ -54,8 +54,8 @@ pdfbox:open($pdfsrc, map{})
};
(:~ open pdf from file/url/binary, opts may have password , returns pdf object
@param $pdfsrc a fetchable url or a xs:base64Binary
@param $opts map {"password":}
@param $pdfsrc a fetchable url or filepath, or xs:base64Binary item
@param $opts options otionally with map {"password":}
:)
declare function pdfbox:open($pdfsrc as item(), $opts as map(*))
as item(){
@ -75,7 +75,7 @@ as item(){
}
};
(:~ the version of the PDF specification used by $pdf e.g "1.4"
(:~ The version of the PDF specification used by $pdf e.g "1.4"
returned as string to avoid float rounding issues
:)
declare function pdfbox:specification($pdf as item())
@ -83,13 +83,13 @@ as xs:string{
PDDocument:getVersion($pdf)=>xs:decimal()=>round(4)=>string()
};
(:~ save pdf $pdf to filesystem at $savepath , returns $savepath :)
(:~ Save pdf $pdf to filesystem at $savepath , returns $savepath :)
declare function pdfbox:save($pdf as item(),$savepath as xs:string)
as xs:string{
PDDocument:save($pdf, File:new($savepath)),$savepath
};
(:~ $pdf as xs:base64Binary :)
(:~ Create binary representation of $pdf as xs:base64Binary :)
declare function pdfbox:binary($pdf as item())
as xs:base64Binary{
let $bytes:=Q{java:java.io.ByteArrayOutputStream}new()
@ -98,7 +98,7 @@ as xs:base64Binary{
=>convert:integers-to-base64()
};
(:~ release references to $pdf:)
(:~ Release any resources related to $pdf:)
declare function pdfbox:close($pdf as item())
as empty-sequence(){
(# db:wrapjava void #) {
@ -106,15 +106,15 @@ as empty-sequence(){
}
};
(:~ number of pages in PDF:)
declare function pdfbox:page-count($pdf as item())
(:~ Number of pages in PDF:)
declare function pdfbox:number-of-pages($pdf as item())
as xs:integer{
PDDocument:getNumberOfPages($pdf)
};
(:~ pdf page as image (zero is cover)
(:~ Pdf page as image (zero is cover)
options.format="bmp jpg png gif" etc, options.scale= 1 is 72 dpi?? :)
declare function pdfbox:page-image($pdf as item(),$pageNo as xs:integer,$options as map(*))
declare function pdfbox:page-render($pdf as item(),$pageNo as xs:integer,$options as map(*))
as xs:base64Binary{
let $options:=map:merge(($options,map{"format":"jpg","scale":1}))
let $bufferedImage:=PDFRenderer:new($pdf)=>PDFRenderer:renderImage($pageNo,$options?scale)
@ -130,7 +130,7 @@ as xs:base64Binary{
values are sequences of functions to get property from $pdf object
:)
declare %private variable $pdfbox:property-map:=map{
"pageCount": pdfbox:page-count#1,
"pageCount": pdfbox:number-of-pages#1,
"hasOutline": pdfbox:hasOutline#1,
@ -166,7 +166,7 @@ declare %private variable $pdfbox:property-map:=map{
};
(:~ known property names sorted :)
declare function pdfbox:defined-properties()
declare function pdfbox:property-names()
as xs:string*{
$pdfbox:property-map=>map:keys()=>sort()
};
@ -275,7 +275,7 @@ as map(*){
)
};
(:~ outline as xml :)
(:~ PDF outline in xml format :)
declare function pdfbox:outline-xml($pdf as item())
as element(outline)?{
let $outline:=pdfbox:outline($pdf)
@ -294,8 +294,8 @@ as element(bookmark)*
</bookmark>
};
(:~ return bookmark info for children of $outlineItem
@return map like{index:,title:,hasChildren:}
(:~ return bookmark info for $bookmark
@return map{index:..,title:..,hasChildren:..}
:)
declare %private function pdfbox:bookmark($bookmark as item(),$pdf as item())
as map(*)
@ -321,8 +321,8 @@ as item()?
=>PDPageTree:indexOf($page)
};
(:~ new PDF doc from 1 based page range as xs:base64Binary :)
declare function pdfbox:extract($pdf as item(),
(:~ Return new extract PDF doc as xs:base64Binary, using a 1 based page range :)
declare function pdfbox:extract-range($pdf as item(),
$start as xs:integer,$end as xs:integer)
as xs:base64Binary
{