[mod] report
This commit is contained in:
parent
4ea01764f9
commit
ce70157303
2 changed files with 64 additions and 29 deletions
|
@ -1,6 +1,6 @@
|
||||||
{
|
{
|
||||||
"name": "pdfbox",
|
"name": "pdfbox",
|
||||||
"version": "0.1.6",
|
"version": "0.2.1",
|
||||||
"description": "A BaseX interface to Apache Pdfbox version 3",
|
"description": "A BaseX interface to Apache Pdfbox version 3",
|
||||||
"main": "src/Pdfbox3.xqm",
|
"main": "src/Pdfbox3.xqm",
|
||||||
"homepage": "https://github.com/npm/example#readme",
|
"homepage": "https://github.com/npm/example#readme",
|
||||||
|
|
|
@ -1,9 +1,11 @@
|
||||||
xquery version '3.1';
|
xquery version '3.1';
|
||||||
(:~
|
(:~
|
||||||
pdfbox 3.0 https://pdfbox.apache.org/ BaseX 10.7+ interface library,
|
pdfbox 3.0 https://pdfbox.apache.org/ BaseX 10.7+ interface library,
|
||||||
requires pdfbox jar on classpath, tested with pdfbox-app-3.0.4.jar
|
requires pdfbox jars on classpath, i.e. in custom or xar
|
||||||
|
tested with pdfbox-app-3.0.4.jar
|
||||||
@see download https://pdfbox.apache.org/download.cgi
|
@see download https://pdfbox.apache.org/download.cgi
|
||||||
@javadoc https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.4/
|
@javadoc https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.4/
|
||||||
|
@author Andy Bunce 2025
|
||||||
:)
|
:)
|
||||||
|
|
||||||
module namespace pdfbox="org.expkg_zone58.Pdfbox3";
|
module namespace pdfbox="org.expkg_zone58.Pdfbox3";
|
||||||
|
@ -91,38 +93,71 @@ as xs:base64Binary{
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
declare variable $pdfbox:doc-info:=map{
|
(:~ property access map
|
||||||
"title": PDDocumentInformation:getTitle#1,
|
keys are property names,
|
||||||
"author": PDDocumentInformation:getAuthor#1,
|
values are sequences of functions to get property from $pdf object
|
||||||
"creator": PDDocumentInformation:getCreator#1,
|
:)
|
||||||
"producer": PDDocumentInformation:getProducer#1,
|
declare %private variable $pdfbox:doc-info:=map{
|
||||||
"subject": PDDocumentInformation:getSubject#1,
|
"pages": pdfbox:page-count#1,
|
||||||
"keywords": PDDocumentInformation:getKeywords#1,
|
|
||||||
"creationdate": function($i){pdfbox:gregToISO(PDDocumentInformation:getCreationDate($i))},
|
"hasOutline": pdfbox:hasOutline#1,
|
||||||
"modificationdate": function($i){pdfbox:gregToISO(PDDocumentInformation:getModificationDate($i))}
|
|
||||||
|
"hasLabels": pdfbox:hasLabels#1,
|
||||||
|
|
||||||
|
"specification":pdfbox:specification#1,
|
||||||
|
|
||||||
|
"title": (PDDocument:getDocumentInformation#1,
|
||||||
|
PDDocumentInformation:getTitle#1) ,
|
||||||
|
|
||||||
|
"author": (PDDocument:getDocumentInformation#1,
|
||||||
|
PDDocumentInformation:getAuthor#1 ),
|
||||||
|
|
||||||
|
"creator": (PDDocument:getDocumentInformation#1,
|
||||||
|
PDDocumentInformation:getCreator#1),
|
||||||
|
|
||||||
|
"producer": (PDDocument:getDocumentInformation#1,
|
||||||
|
PDDocumentInformation:getProducer#1),
|
||||||
|
|
||||||
|
"subject": (PDDocument:getDocumentInformation#1,
|
||||||
|
PDDocumentInformation:getSubject#1),
|
||||||
|
|
||||||
|
"keywords": (PDDocument:getDocumentInformation#1,
|
||||||
|
PDDocumentInformation:getKeywords#1),
|
||||||
|
|
||||||
|
"creationDate": (PDDocument:getDocumentInformation#1,
|
||||||
|
PDDocumentInformation:getCreationDate#1,
|
||||||
|
pdfbox:gregToISO#1),
|
||||||
|
|
||||||
|
"modificationDate": (PDDocument:getDocumentInformation#1,
|
||||||
|
PDDocumentInformation:getModificationDate#1,
|
||||||
|
pdfbox:gregToISO#1)
|
||||||
};
|
};
|
||||||
|
|
||||||
(:~ map with document metadata :)
|
(:~ return value of $property for $pdf :)
|
||||||
declare function pdfbox:metadata($pdf as item())
|
declare function pdfbox:property($pdf as item(),$property as xs:string)
|
||||||
as map(*){
|
as item()*{
|
||||||
let $info:=PDDocument:getDocumentInformation($pdf)
|
let $fns:= $pdfbox:doc-info($property)
|
||||||
return map:for-each($pdfbox:doc-info,
|
return if(exists($fns))
|
||||||
function($k,$v){map:entry($k,$pdfbox:doc-info($k)($info))})
|
then fold-left($fns,
|
||||||
=>map:merge()
|
$pdf,
|
||||||
|
function($result,$this as function(*)){$this($result)})
|
||||||
|
else error(xs:QName('pdfbox:property'),concat("Property '",$property,"' not defined."))
|
||||||
};
|
};
|
||||||
|
|
||||||
(:~ summary info as map for $pdfpath :)
|
(:~ summary CSV style info for all properties for $pdfpaths :)
|
||||||
declare function pdfbox:report($pdfpath as xs:string)
|
declare function pdfbox:report($pdfpaths as xs:string*)
|
||||||
as map(*){
|
as map(*){
|
||||||
let $pdf:=pdfbox:open-file($pdfpath)
|
pdfbox:report($pdfpaths,map:keys($pdfbox:doc-info))
|
||||||
return (map{
|
};
|
||||||
"file": $pdfpath,
|
|
||||||
"pages": pdfbox:page-count($pdf),
|
(:~ summary CSV style info for named properties for $pdfpaths :)
|
||||||
"hasOutline": pdfbox:hasOutline($pdf),
|
declare function pdfbox:report($pdfpaths as xs:string*, $properties as xs:string*)
|
||||||
"hasLabels": pdfbox:hasLabels($pdf),
|
as map(*){
|
||||||
"specification":pdfbox:specification($pdf)
|
map{"names": array{$properties},
|
||||||
},pdfbox:metadata($pdf)
|
|
||||||
)=>map:merge()
|
"records": for $pdf in $pdfpaths!pdfbox:open-file(.)
|
||||||
|
return array{$properties!pdfbox:property($pdf, .)}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
(:~ true if $pdf has an outline :)
|
(:~ true if $pdf has an outline :)
|
||||||
|
|
Loading…
Add table
Reference in a new issue