1
0
Fork 0

[mod] report

This commit is contained in:
Andy Bunce 2025-02-15 16:24:09 +00:00
parent 4ea01764f9
commit ce70157303
2 changed files with 64 additions and 29 deletions

View file

@ -1,6 +1,6 @@
{ {
"name": "pdfbox", "name": "pdfbox",
"version": "0.1.6", "version": "0.2.1",
"description": "A BaseX interface to Apache Pdfbox version 3", "description": "A BaseX interface to Apache Pdfbox version 3",
"main": "src/Pdfbox3.xqm", "main": "src/Pdfbox3.xqm",
"homepage": "https://github.com/npm/example#readme", "homepage": "https://github.com/npm/example#readme",

View file

@ -1,9 +1,11 @@
xquery version '3.1'; xquery version '3.1';
(:~ (:~
pdfbox 3.0 https://pdfbox.apache.org/ BaseX 10.7+ interface library, pdfbox 3.0 https://pdfbox.apache.org/ BaseX 10.7+ interface library,
requires pdfbox jar on classpath, tested with pdfbox-app-3.0.4.jar requires pdfbox jars on classpath, i.e. in custom or xar
tested with pdfbox-app-3.0.4.jar
@see download https://pdfbox.apache.org/download.cgi @see download https://pdfbox.apache.org/download.cgi
@javadoc https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.4/ @javadoc https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.4/
@author Andy Bunce 2025
:) :)
module namespace pdfbox="org.expkg_zone58.Pdfbox3"; module namespace pdfbox="org.expkg_zone58.Pdfbox3";
@ -91,38 +93,71 @@ as xs:base64Binary{
}; };
declare variable $pdfbox:doc-info:=map{ (:~ property access map
"title": PDDocumentInformation:getTitle#1, keys are property names,
"author": PDDocumentInformation:getAuthor#1, values are sequences of functions to get property from $pdf object
"creator": PDDocumentInformation:getCreator#1, :)
"producer": PDDocumentInformation:getProducer#1, declare %private variable $pdfbox:doc-info:=map{
"subject": PDDocumentInformation:getSubject#1, "pages": pdfbox:page-count#1,
"keywords": PDDocumentInformation:getKeywords#1,
"creationdate": function($i){pdfbox:gregToISO(PDDocumentInformation:getCreationDate($i))}, "hasOutline": pdfbox:hasOutline#1,
"modificationdate": function($i){pdfbox:gregToISO(PDDocumentInformation:getModificationDate($i))}
"hasLabels": pdfbox:hasLabels#1,
"specification":pdfbox:specification#1,
"title": (PDDocument:getDocumentInformation#1,
PDDocumentInformation:getTitle#1) ,
"author": (PDDocument:getDocumentInformation#1,
PDDocumentInformation:getAuthor#1 ),
"creator": (PDDocument:getDocumentInformation#1,
PDDocumentInformation:getCreator#1),
"producer": (PDDocument:getDocumentInformation#1,
PDDocumentInformation:getProducer#1),
"subject": (PDDocument:getDocumentInformation#1,
PDDocumentInformation:getSubject#1),
"keywords": (PDDocument:getDocumentInformation#1,
PDDocumentInformation:getKeywords#1),
"creationDate": (PDDocument:getDocumentInformation#1,
PDDocumentInformation:getCreationDate#1,
pdfbox:gregToISO#1),
"modificationDate": (PDDocument:getDocumentInformation#1,
PDDocumentInformation:getModificationDate#1,
pdfbox:gregToISO#1)
}; };
(:~ map with document metadata :) (:~ return value of $property for $pdf :)
declare function pdfbox:metadata($pdf as item()) declare function pdfbox:property($pdf as item(),$property as xs:string)
as map(*){ as item()*{
let $info:=PDDocument:getDocumentInformation($pdf) let $fns:= $pdfbox:doc-info($property)
return map:for-each($pdfbox:doc-info, return if(exists($fns))
function($k,$v){map:entry($k,$pdfbox:doc-info($k)($info))}) then fold-left($fns,
=>map:merge() $pdf,
function($result,$this as function(*)){$this($result)})
else error(xs:QName('pdfbox:property'),concat("Property '",$property,"' not defined."))
}; };
(:~ summary info as map for $pdfpath :) (:~ summary CSV style info for all properties for $pdfpaths :)
declare function pdfbox:report($pdfpath as xs:string) declare function pdfbox:report($pdfpaths as xs:string*)
as map(*){ as map(*){
let $pdf:=pdfbox:open-file($pdfpath) pdfbox:report($pdfpaths,map:keys($pdfbox:doc-info))
return (map{ };
"file": $pdfpath,
"pages": pdfbox:page-count($pdf), (:~ summary CSV style info for named properties for $pdfpaths :)
"hasOutline": pdfbox:hasOutline($pdf), declare function pdfbox:report($pdfpaths as xs:string*, $properties as xs:string*)
"hasLabels": pdfbox:hasLabels($pdf), as map(*){
"specification":pdfbox:specification($pdf) map{"names": array{$properties},
},pdfbox:metadata($pdf)
)=>map:merge() "records": for $pdf in $pdfpaths!pdfbox:open-file(.)
return array{$properties!pdfbox:property($pdf, .)}
}
}; };
(:~ true if $pdf has an outline :) (:~ true if $pdf has an outline :)