diff --git a/.vscode/settings.json b/.vscode/settings.json index 13498c2..d566f68 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,6 @@ { "basexTools.xquery.profile": "basex-10", "basexTools.xquery.showHovers": false, + "basexTools.xquery.executionDefault": "basexclient", } \ No newline at end of file diff --git a/.xqdoca b/.xqdoca index df58509..fd65659 100644 --- a/.xqdoca +++ b/.xqdoca @@ -1,4 +1,4 @@ - jars/ + src/ docs/xqdoc/ \ No newline at end of file diff --git a/docs/pdf.xqbk b/docs/pdf.xqbk new file mode 100644 index 0000000..76e2a23 --- /dev/null +++ b/docs/pdf.xqbk @@ -0,0 +1 @@ +{"cells":[{"kind":2,"language":"xquery","value":"import module namespace pdfbox=\"org.expkg_zone58.Pdfbox3\";\r\nlet $a:=pdfbox:open(\"C:\\Users\\mrwhe\\git\\expkg-zone58\\pdfbox\\data\\1e\\gpg-book\\2-5-1\\B4541C-TRD\\255894---Book_File-Web_PDF_9798400668005_486272.pdf\")\r\nreturn pdfbox:labels($a)"}]} \ No newline at end of file diff --git a/package.json b/package.json index 58a8beb..2aebc76 100644 --- a/package.json +++ b/package.json @@ -22,11 +22,11 @@ "expkg_zone58": { "namespace": "org.expkg_zone58.Pdfbox3", "main-class": "org.apache.pdfbox.pdmodel.PDDocument", - "maven": [ - "org/apache/pdfbox/pdfbox/3.0.4/pdfbox-3.0.4.jar", - "org/apache/pdfbox/pdfbox-io/3.0.4/pdfbox-io-3.0.4.jar", - "org/apache/pdfbox/fontbox/3.0.4/fontbox-3.0.4.jar", - "commons-logging/commons-logging/1.3.4/commons-logging-1.3.4.jar" + "maven2": [ + "org.apache.pdfbox:pdfbox:3.0.4", + "org.apache.pdfbox:pdfbox-io:3.0.4", + "org.apache.pdfbox:fontbox:3.0.4", + "commons-logging:commons-logging:1.3.4" ] } diff --git a/scripts/build.xqm b/scripts/build.xqm index a4e40d3..34e81a0 100644 --- a/scripts/build.xqm +++ b/scripts/build.xqm @@ -94,7 +94,7 @@ as xs:string{ declare function build:xar-create() as xs:base64Binary{ - let $_:=build:maven-download($build:PKG?expkg_zone58?maven=>array:flatten(),$build:base || "jars/") + let $_:=build:maven-download($build:PKG?expkg_zone58?maven2=>array:flatten(),$build:base || "jars/") let $entries:= build:xar-add(map{},build:jars("content"),build:jars("download")!build:content(.)) =>build:xar-add("content/Pdfbox3.xqm",build:content("src/Pdfbox3.xqm")) @@ -124,27 +124,45 @@ as xs:string{ declare function build:jars($style as xs:string) as xs:string*{ -let $src:=$build:PKG?expkg_zone58?maven=>array:flatten() -let $names:= $src!replace(.,"^.*/","") +let $artifacts:=$build:PKG?expkg_zone58?maven2=>array:flatten() +let $names:= $artifacts!build:maven-slug(.)!file:name(.) return switch($style) case "name" return $names case "download" return $names!concat("jars/",.) case "content" return $names!concat("content/",.) -default return $src +default return $names }; (:~ download $files from $urls to $destdir:) declare variable $build:REPO as xs:string external :="https://repo1.maven.org/maven2/"; -declare function build:maven-download($urls as xs:string*,$destdir as xs:string) + +declare function build:maven-download($artifacts as xs:string*,$destdir as xs:string) as empty-sequence(){ file:create-dir($destdir), - for $f in $urls - let $dest:=$destdir || replace($f,"^.*/","") + for $id in $artifacts + let $slug:=build:maven-slug($id) + let $dest:=$destdir || file:name($slug) where not(file:exists($dest)) - return build:write-binary($dest, fetch:binary(resolve-uri($f,$build:REPO) + return build:write-binary($dest, fetch:binary(resolve-uri($slug,$build:REPO) =>trace("Download: "))) }; +(:~ non-rooted url for maven artifact :) +declare function build:maven-slug($artifact as xs:string) +as xs:string{ + + let $parts:=if(matches($artifact,'[^:]+:[^:]+:[^:]+')) + then tokenize($artifact,":") + else error(xs:QName('build:maven-slug'),"invalid format required 'groupId:id:version'") + + return ( + translate($parts[1],".","/"), + $parts[2], + $parts[3], + string-join(($parts[2] , "-" , $parts[3] , ".jar"),"") + )=>string-join("/") +}; + (:~ write-binary, creating dir if required :) declare function build:write-binary($dest as xs:string,$contents as xs:base64Binary?) as empty-sequence(){ diff --git a/src/Pdfbox3.xqm b/src/Pdfbox3.xqm index 081eb19..dafdf7f 100644 --- a/src/Pdfbox3.xqm +++ b/src/Pdfbox3.xqm @@ -98,7 +98,7 @@ as xs:base64Binary{ =>convert:integers-to-base64() }; -(: release references to $pdf:) +(:~ release references to $pdf:) declare function pdfbox:close($pdf as item()) as empty-sequence(){ (# db:wrapjava void #) { @@ -112,7 +112,7 @@ as xs:integer{ PDDocument:getNumberOfPages($pdf) }; -(:~ render of $pdf page to image +(:~ pdf page as image (zero is cover) options.format="bmp jpg png gif" etc, options.scale= 1 is 72 dpi?? :) declare function pdfbox:page-image($pdf as item(),$pageNo as xs:integer,$options as map(*)) as xs:base64Binary{ @@ -284,6 +284,7 @@ as element(outline)?{ else () }; +(:~ recursive ouutline map to XML :) declare %private function pdfbox:bookmark-xml($outline as map(*)*) as element(bookmark)* { @@ -355,7 +356,7 @@ as xs:string{ return (# db:checkstrings #) {PDFTextStripper:getText($tStripper,$pdf)} }; -(:~ return size of $pageNo zero based :) +(:~ return size of $pageNo (zero is cover :) declare function pdfbox:page-size($pdf as item(), $pageNo as xs:integer) as xs:string{ PDDocument:getPage($pdf, $pageNo) @@ -363,7 +364,7 @@ as xs:string{ =>PDRectangle:toString() }; -(:~ version of Apache Pdfbox in use e.g. "3.0.4" :) +(:~ version of Apache Pdfbox in use e.g. "3.0.4" :) declare function pdfbox:version() as xs:string{ Q{java:org.apache.pdfbox.util.Version}getVersion()