[fix] combined packaging

2025-01-24 11:10:24 +00:00 · 2025-01-24 11:10:24 +00:00 · 1fe7edc1f7
commit 1fe7edc1f7
parent 4bcfaefcc0
10 changed files with 362 additions and 10 deletions
--- a/.gitea/workflows/ci-basex.yaml
+++ b/.gitea/workflows/ci-basex.yaml
@ -10,17 +10,23 @@ on:

  
 jobs:
-  test:
-    runs-on:  basex-10.7
+ test:
+    runs-on: ubuntu-latest

    steps:
-    - name: Set up Node.js
-      uses: actions/setup-node@v4
-      with:
-       node-version: 18
-
    - name: Checkout repository
      uses: actions/checkout@v2

+    - name: Set up Java
+      uses: actions/setup-java@v2
+      with:
+        java-version: '11'
+
+    - name: Install BaseX
+      run: |
+        wget http://files.basex.org/releases/9.6.3/BaseX963.zip
+        unzip BaseX963.zip -d basex
+
    - name: Run BaseX Tests
-      run:  'basex/bin/basex -t .'
+      run: |
+        ./basex/bin/basex -c"RUN tests/test.bxs"
--- a/docs/pdfbox.xqbk
+++ b/docs/pdfbox.xqbk
@ -1 +1 @@
-{"cells":[{"kind":1,"language":"markdown","value":"# PDFBox3 \r\nA BaseX 10+ interface to Apache PDFBox® library version 3 \r\n## Apache PDFBox® - A Java PDF Library\r\n\r\nThe Apache PDFBox® library is an open source Java tool for working with PDF documents. This project allows creation of new PDF documents, manipulation of existing documents and the ability to extract content from documents. Apache PDFBox also includes several command-line utilities. Apache PDFBox is published under the Apache License v2.0.\r\nhttps://pdfbox.apache.org/"},{"kind":1,"language":"markdown","value":"It comes with the useful PDF debug tool `java -jar debugger-app-3.0.2.jar`"},{"kind":1,"language":"markdown","value":"## Set up XQuery context for following code..."},{"kind":2,"language":"xquery","value":"(:<:)(: XQuery Context :)\r\nimport module namespace pdfbox = \"urn:expkg-zone58:pdfbox3\" at \"../src/lib/pdfbox3.xqm\";\r\nimport module namespace bookpages = 'urn:bookpages'  at \"../src/lib/bookpages.xqm\";\r\nimport module namespace pdfscrape = 'urn:pdfscrape'  at \"../src/lib/pdfscrape.xqm\";\r\nimport module namespace config = 'urn:abc-clio:config' at 'C:\\Users\\mrwhe\\git\\bloomsbury\\content-architecture\\xquery\\ABC-CLIO/lib/abc-config.xqm';\r\n\r\ndeclare variable $samples:= map{\r\n    \"climate\":  \"drop-01d\\set\\2-6-1\\A5579C_1\\271989---Book_File-Web_PDF_9798400627484_486728.pdf\",\r\n    \"women\":    \"drop-01d\\set\\2-6-1\\A6229C_1\\257334---Book_File-Web_PDF_9798216172628_486742.pdf\",\r\n    \"genocide\": \"drop1-pdf\\GR2967-TRD\\272791---Book_File-Web_PDF_9798400640216_486366.pdf\",\r\n    \"world\":    \"drop-01c\\gpg-book\\2-6\\A3506C-TRD\\256186---Book_File-Web_PDF_9798216038955_486148.pdf\"\r\n};\r\ndeclare variable $PDF:= (: $samples?women=>file:resolve-path($config:data) :)\r\n\"C:\\Users\\mrwhe\\git\\bloomsbury\\content-architecture\\xquery\\ABC-CLIO\\data\\drop-01e\\set\\2-6-1\\A5690C_1\\257107---Book_File-Web_PDF_9798400691218_486731.pdf\";"},{"kind":1,"language":"markdown","value":" ## Check pdfbox version"},{"kind":2,"language":"xquery","value":"pdfbox:version()"},{"kind":1,"language":"markdown","value":"PDF specification version used by document"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)=>pdfbox:pdfVersion()"},{"kind":1,"language":"markdown","value":"# Page count for PDF"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)=>pdfbox:page-count()"},{"kind":1,"language":"markdown","value":"# save range to new pdf"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)=>pdfbox:extract(2,12,\"c:\\tmp\\a.pdf\")"},{"kind":1,"language":"markdown","value":"## Outline / bookmarks"},{"kind":1,"language":"markdown","value":"### sequence of maps"},{"kind":2,"language":"xquery","value":"\r\npdfbox:open($PDF)=>pdfbox:outline()"},{"kind":1,"language":"markdown","value":"XML"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)=>pdfbox:outline()=>pdfbox:outline-xml()"},{"kind":1,"language":"markdown","value":"## Page labels"},{"kind":2,"language":"xquery","value":"\r\npdfbox:open($PDF)=>pdfbox:pageLabels()"},{"kind":1,"language":"markdown","value":"#  getText from page index"},{"kind":2,"language":"xquery","value":"let $doc:=pdfbox:open($PDF)\r\nreturn pdfbox:getText($doc,56)"},{"kind":1,"language":"markdown","value":"# Page scraping"},{"kind":1,"language":"markdown","value":"## pdf scrape text analysis"},{"kind":2,"language":"xquery","value":"let $doc:=pdfbox:open($PDF)\r\nreturn pdfscrape:page-report($doc)\r\n"},{"kind":1,"language":"markdown","value":"## Inverted pageno map"},{"kind":2,"language":"xquery","value":"let $doc:=pdfbox:open($PDF)\r\nreturn pdfscrape:page-report($doc)=>pdfscrape:inverted-map()"},{"kind":1,"language":"markdown","value":"# Save images"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)\r\n=> pdfbox:pageBufferedImage(99,1)\r\n=>pdfbox:imageSave(\"c:\\tmp\\page3.png\",\"png\")\r\n"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)\r\n=> pdfbox:pageBufferedImage(3,0.25)\r\n=>pdfbox:imageBinary(\"jpg\")"},{"kind":1,"language":"markdown","value":"## report"},{"kind":2,"language":"xquery","value":"declare variable $a:=file:resolve-path(\"../data/1e/\",file:base-dir());\r\n\r\nfor $f in file:list($a,true(),\"*.pdf\") \r\nwhere not(contains($f,\"outputs\"))\r\nlet $doc:=pdfbox:open(file:resolve-path($f,$a))\r\n(: let $outline:=pdfbox:outline($doc) :)\r\nlet $count:=pdfbox:page-count($doc)\r\norder by $count \r\nreturn ``[`{$f}`: `{ $count }`]``"},{"kind":2,"language":"xquery","value":"declare variable $a:=file:resolve-path(\"../data/1e/\",file:base-dir());\r\n\r\nfor $f at $pos in file:list($a,true(),\"*.pdf\") \r\nwhere not(contains($f,\"outputs\"))\r\nreturn pdfbox:open(file:resolve-path($f,$a))\r\n=> pdfbox:pageAsImage(0,0.25)\r\n=> pdfbox:imageSave(``[c:\\tmp\\titles\\p`{$pos}`.gif]``,\"gif\")"}]}
+{"cells":[{"kind":1,"language":"markdown","value":"# PDFBox3 \r\nA BaseX 10+ interface to Apache PDFBox® library version 3 \r\n## Apache PDFBox® - A Java PDF Library\r\n\r\nThe Apache PDFBox® library is an open source Java tool for working with PDF documents. This project allows creation of new PDF documents, manipulation of existing documents and the ability to extract content from documents. Apache PDFBox also includes several command-line utilities. Apache PDFBox is published under the Apache License v2.0.\r\nhttps://pdfbox.apache.org/"},{"kind":1,"language":"markdown","value":"It comes with the useful PDF debug tool `java -jar debugger-app-3.0.2.jar`"},{"kind":1,"language":"markdown","value":"## Set up XQuery context for following code..."},{"kind":2,"language":"xquery","value":"(:<:)(: XQuery Context :)\r\nimport module namespace pdfbox = \"urn:expkg-zone58:pdfbox3\" at \"../src/lib/pdfbox3.xqm\";\r\nimport module namespace bookpages = 'urn:bookpages'  at \"../src/lib/bookpages.xqm\";\r\nimport module namespace pdfscrape = 'urn:pdfscrape'  at \"../src/lib/pdfscrape.xqm\";\r\nimport module namespace config = 'urn:abc-clio:config' at 'C:\\Users\\mrwhe\\git\\bloomsbury\\content-architecture\\xquery\\ABC-CLIO/lib/abc-config.xqm';\r\n\r\ndeclare variable $samples:= map{\r\n    \"climate\":  \"drop-01d\\set\\2-6-1\\A5579C_1\\271989---Book_File-Web_PDF_9798400627484_486728.pdf\",\r\n    \"women\":    \"drop-01d\\set\\2-6-1\\A6229C_1\\257334---Book_File-Web_PDF_9798216172628_486742.pdf\",\r\n    \"genocide\": \"drop1-pdf\\GR2967-TRD\\272791---Book_File-Web_PDF_9798400640216_486366.pdf\",\r\n    \"world\":    \"drop-01c\\gpg-book\\2-6\\A3506C-TRD\\256186---Book_File-Web_PDF_9798216038955_486148.pdf\"\r\n};\r\ndeclare variable $PDF:= (: $samples?women=>file:resolve-path($config:data) :)\"C:\\Users\\mrwhe\\git\\bloomsbury\\content-architecture\\xquery\\ABC-CLIO\\data\\drop-01e\\set\\2-6-1\\A5690C_1\\257107---Book_File-Web_PDF_9798400691218_486731.pdf\"; \r\n"},{"kind":1,"language":"markdown","value":" ## Check pdfbox version"},{"kind":2,"language":"xquery","value":"pdfbox:version()"},{"kind":1,"language":"markdown","value":"PDF specification version used by document"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)=>pdfbox:pdfVersion()"},{"kind":1,"language":"markdown","value":"# Page count for PDF"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)=>pdfbox:page-count()"},{"kind":1,"language":"markdown","value":"# save range to new pdf"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)=>pdfbox:extract(2,12,\"c:\\tmp\\a.pdf\")"},{"kind":1,"language":"markdown","value":"## Outline / bookmarks"},{"kind":1,"language":"markdown","value":"### sequence of maps"},{"kind":2,"language":"xquery","value":"\r\npdfbox:open($PDF)=>pdfbox:outline()"},{"kind":1,"language":"markdown","value":"XML"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)=>pdfbox:outline()=>pdfbox:outline-xml()"},{"kind":1,"language":"markdown","value":"## Page labels"},{"kind":2,"language":"xquery","value":"\r\npdfbox:open($PDF)=>pdfbox:pageLabels()"},{"kind":1,"language":"markdown","value":"#  getText from page index"},{"kind":2,"language":"xquery","value":"let $doc:=pdfbox:open($PDF)\r\nreturn pdfbox:getText($doc,56)"},{"kind":1,"language":"markdown","value":"# Page scraping"},{"kind":1,"language":"markdown","value":"## pdf scrape text analysis"},{"kind":2,"language":"xquery","value":"let $doc:=pdfbox:open($PDF)\r\nreturn pdfscrape:page-report($doc)\r\n"},{"kind":1,"language":"markdown","value":"## Inverted pageno map"},{"kind":2,"language":"xquery","value":"let $doc:=pdfbox:open($PDF)\r\nreturn pdfscrape:page-report($doc)=>pdfscrape:inverted-map()"},{"kind":1,"language":"markdown","value":"# Save images"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)\r\n=> pdfbox:pageBufferedImage(99,1)\r\n=>pdfbox:imageSave(\"c:\\tmp\\page3.png\",\"png\")\r\n"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)\r\n=> pdfbox:pageBufferedImage(3,0.25)\r\n=>pdfbox:imageBinary(\"jpg\")"},{"kind":1,"language":"markdown","value":"## report"},{"kind":2,"language":"xquery","value":"declare variable $a:=file:resolve-path(\"../data/1e/\",file:base-dir());\r\n\r\nfor $f in file:list($a,true(),\"*.pdf\") \r\nwhere not(contains($f,\"outputs\"))\r\nlet $doc:=pdfbox:open(file:resolve-path($f,$a))\r\n(: let $outline:=pdfbox:outline($doc) :)\r\nlet $count:=pdfbox:page-count($doc)\r\norder by $count \r\nreturn ``[`{$f}`: `{ $count }`]``"},{"kind":2,"language":"xquery","value":"declare variable $a:=file:resolve-path(\"../data/1e/\",file:base-dir());\r\n\r\nfor $f at $pos in file:list($a,true(),\"*.pdf\") \r\nwhere not(contains($f,\"outputs\"))\r\nreturn pdfbox:open(file:resolve-path($f,$a))\r\n=> pdfbox:pageAsImage(0,0.25)\r\n=> pdfbox:imageSave(``[c:\\tmp\\titles\\p`{$pos}`.gif]``,\"gif\")"}]}
--- a/lib/pdfbox-3.0.3/fontbox-3.0.3.jar
+++ b/lib/pdfbox-3.0.3/fontbox-3.0.3.jar
--- a/lib/pdfbox-3.0.3/pdfbox-io-3.0.3.jar
+++ b/lib/pdfbox-3.0.3/pdfbox-io-3.0.3.jar
--- a/jars/loader.xqm
+++ b/jars/loader.xqm
@ -0,0 +1,272 @@
+xquery version '3.1';
+(:~ 
+pdfbox 3.0 https://pdfbox.apache.org/ BaseX 10.7+ interface library, 
+requires pdfbox jar on classpath
+3.02+ required tested with pdfbox-app-3.0.2.jar
+@see download https://pdfbox.apache.org/download.cgi
+@javadoc https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.2/
+
+:)
+module namespace pdfbox="org.apache.pdfbox.Loader";
+
+declare namespace Loader ="java:org.apache.pdfbox.Loader"; 
+declare namespace PDFTextStripper = "java:org.apache.pdfbox.text.PDFTextStripper";
+
+(:~ 
+@see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.0/org/apache/pdfbox/pdmodel/PDDocument.html 
+:)
+declare namespace PDDocument ="java:org.apache.pdfbox.pdmodel.PDDocument";
+
+declare namespace PDDocumentCatalog ="java:org.apache.pdfbox.pdmodel.PDDocumentCatalog";
+declare namespace PDPageLabels ="java:org.apache.pdfbox.pdmodel.common.PDPageLabels";
+
+(:~ 
+@see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.0/org/apache/pdfbox/multipdf/PageExtractor.html 
+:)
+declare namespace PageExtractor ="java:org.apache.pdfbox.multipdf.PageExtractor";
+ 
+(:~ 
+ @see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.0/org/apache/pdfbox/pdmodel/PDPageTree.html
+:)
+declare namespace PDPageTree ="java:org.apache.pdfbox.pdmodel.PDPageTree";
+
+(:~ 
+@see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.2/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/PDDocumentOutline.html 
+:)
+declare namespace PDDocumentOutline ="java:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline";
+
+declare namespace PDDocumentInformation ="java:org.apache.pdfbox.pdmodel.PDDocumentInformation";
+(:~ 
+@see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.0/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/PDOutlineItem.html 
+:)
+declare namespace PDOutlineItem="java:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem";
+declare namespace PDFRenderer="java:org.apache.pdfbox.rendering.PDFRenderer";
+declare namespace RandomAccessReadBufferedFile = "java:org.apache.pdfbox.io.RandomAccessReadBufferedFile";
+declare namespace File ="java:java.io.File";
+
+(:~ version of pdfbox:)
+declare function pdfbox:version()
+as xs:string{
+  Q{java:org.apache.pdfbox.util.Version}getVersion()
+};
+
+(:~ open pdf, returns handle :)
+declare function pdfbox:open($pdfpath as xs:string)
+as item(){
+  Loader:loadPDF( RandomAccessReadBufferedFile:new($pdfpath))
+};
+
+(:~ the PDF specification version this document conforms to.:)
+declare function pdfbox:pdfVersion($doc as item())
+as xs:float{
+  PDDocument:getVersion($doc)
+};
+
+(:~ save pdf $doc to $savepath , returns $savepath :)
+declare function pdfbox:save($doc as item(),$savepath as xs:string)
+as xs:string{
+   PDDocument:save($doc,File:new($savepath)),$savepath
+};
+
+declare function pdfbox:close($doc as item())
+as empty-sequence(){
+  (# db:wrapjava void #) {
+     PDDocument:close($doc)
+  }
+};
+
+declare function pdfbox:page-count($doc as item())
+as xs:integer{
+  PDDocument:getNumberOfPages($doc)
+};
+
+(:~ map with document metadata :)
+declare function pdfbox:information($doc as item())
+as map(*){
+  let $info:=PDDocument:getDocumentInformation($doc)
+  return map{
+    "title": PDDocumentInformation:getTitle($info),
+    "creator": PDDocumentInformation:getCreator($info),
+    "producer": PDDocumentInformation:getProducer($info),
+    "subject": PDDocumentInformation:getSubject($info),
+     "keywords": PDDocumentInformation:getKeywords($info),
+     "creationdate": pdfbox:gregToISO(PDDocumentInformation:getCreationDate($info)),
+    "author": PDDocumentInformation:getAuthor($info)
+  }
+};
+
+ (:~ convert date :)
+declare
+function pdfbox:gregToISO($item as item())
+as xs:string{
+ Q{java:java.util.GregorianCalendar}toZonedDateTime($item)=>string()
+};
+
+(:~ outline for $doc as map()* :)
+declare function pdfbox:outline($doc as item())
+as map(*)*{
+  (# db:wrapjava some #) {
+  let $outline:=
+                PDDocument:getDocumentCatalog($doc)
+                =>PDDocumentCatalog:getDocumentOutline()
+ 
+  return  if(exists($outline))
+          then pdfbox:outline($doc,PDOutlineItem:getFirstChild($outline)) 
+  }
+};
+
+(:~ return bookmark info for children of $outlineItem as seq of maps :)
+declare function pdfbox:outline($doc as item(),$outlineItem as item()?)
+
+as map(*)*{
+  let $find as map(*):=pdfbox:_outline($doc ,$outlineItem)
+  return map:get($find,"list")
+};
+
+(: BaseX bug 10.7? error if inlined in outline :)
+declare function pdfbox:_outline($doc as item(),$outlineItem as item()?)
+as map(*){
+ hof:until(
+            function($output) { empty($output?this) },
+            function($input ) { 
+                      let $bk:= pdfbox:bookmark($input?this,$doc)
+                      let $bk:= if($bk?hasChildren)
+                                then let $kids:=pdfbox:outline($doc,PDOutlineItem:getFirstChild($input?this))
+                                     return map:merge(($bk,map:entry("children",$kids)))
+                                else $bk 
+                      return map{
+                            "list": ($input?list, $bk),
+                            "this":  PDOutlineItem:getNextSibling($input?this)}
+                          },
+            map{"list":(),"this":$outlineItem}
+        ) 
+};
+(:~ outline as xml :)
+declare function pdfbox:outline-xml($outline as map(*)*)
+as element(outline){
+ element outline { 
+   $outline!pdfbox:bookmark-xml(.)
+ }
+};
+
+declare function pdfbox:bookmark-xml($outline as map(*)*)
+as element(bookmark)*
+{
+  $outline!
+  <bookmark title="{?title}" index="{?index}">
+    {?children!pdfbox:bookmark-xml(.)}
+  </bookmark>
+};
+
+(: return bookmark info for children of $outlineItem :)
+declare function pdfbox:bookmark($bookmark as item(),$doc as item())
+as map(*)
+{
+ map{ 
+  "index":  PDOutlineItem:findDestinationPage($bookmark,$doc)=>pdfbox:pageIndex($doc),
+  "title":  (# db:checkstrings #) {PDOutlineItem:getTitle($bookmark)}=>translate("<22>",""),
+  "hasChildren": PDOutlineItem:hasChildren($bookmark)
+  }
+};
+
+declare function pdfbox:outx($page ,$document)
+{
+  let $currentPage := PDOutlineItem:findDestinationPage($page,$document)
+  let $pageNumber := pdfbox:pageIndex($currentPage,$document)
+  return $pageNumber
+};
+
+(:~ pageIndex of $page in $document :)
+declare function pdfbox:pageIndex(
+   $page as item()? (: as java:org.apache.pdfbox.pdmodel.PDPage :),
+   $document)
+as item()?
+{
+  if(exists($page))
+  then PDDocument:getDocumentCatalog($document)
+      =>PDDocumentCatalog:getPages()
+      =>PDPageTree:indexOf($page)
+};            
+
+
+
+(:~ save new PDF doc from 1 based page range 
+@return save path :)
+declare function pdfbox:extract($doc as item(), 
+             $start as xs:integer,$end as xs:integer,$target as xs:string)
+as xs:string
+{
+    let $a:=PageExtractor:new($doc, $start, $end) =>PageExtractor:extract()
+    return (pdfbox:save($a,$target),pdfbox:close($a)) 
+};
+
+
+(:~   pageLabel info
+@see https://www.w3.org/TR/WCAG20-TECHS/PDF17.html#PDF17-examples
+@see https://codereview.stackexchange.com/questions/286078/java-code-showing-page-labels-from-pdf-files
+:)
+declare function pdfbox:getPageLabels($doc as item())
+as item()
+{
+  PDDocument:getDocumentCatalog($doc)
+  =>PDDocumentCatalog:getPageLabels()
+};
+
+(:~   pageLabel for every page:)
+declare function pdfbox:pageLabels($doc as item())
+as xs:string*
+{
+  PDDocument:getDocumentCatalog($doc)
+  =>PDDocumentCatalog:getPageLabels()
+  =>PDPageLabels:getLabelsByPageIndices()
+};
+
+(:~ return text on $pageNo :)
+declare function pdfbox:getText($doc as item(), $pageNo as xs:integer)
+as xs:string{
+  let $tStripper := (# db:wrapjava instance #) {
+         PDFTextStripper:new()
+         => PDFTextStripper:setStartPage($pageNo)
+         => PDFTextStripper:setEndPage($pageNo)
+       }
+  return (# db:checkstrings #) {PDFTextStripper:getText($tStripper,$doc)}
+};
+
+(:~ summary info as map for $pdfpath :)
+declare function pdfbox:report($pdfpath as xs:string)
+as map(*){
+ let $doc:=pdfbox:open($pdfpath)
+ return (map{
+       "file":  $pdfpath,
+       "pages": pdfbox:page-count($doc),
+       "outline": pdfbox:outline($doc)=>count()
+        },pdfbox:information($doc)
+)=>map:merge()
+};
+
+(:~ java:bufferedImage for $pageNo using $scale times dpi= 72
+@param $pageNo (ZERO based) 
+@param $scale 1=72 dpi 
+@return  Java java.awt.image.BufferedImage object
+:)
+declare function pdfbox:pageBufferedImage($doc as item(), $pageNo as xs:integer,$scale as xs:float)
+as item(){
+ PDFRenderer:new($doc)=>PDFRenderer:renderImage($pageNo,$scale)
+};
+
+(:~ save bufferedimage to $dest 
+@param $type = "gif","png" etc:)
+declare function pdfbox:imageSave($bufferedImage as item(),$dest as xs:string,$type as xs:string)
+as xs:boolean{
+  Q{java:javax.imageio.ImageIO}write($bufferedImage , $type,  File:new($dest))
+};
+
+(:~ return image 
+@param $type = "gif","png" etc:)
+declare function pdfbox:imageBinary($bufferedImage as item(),$type as xs:string)
+as xs:base64Binary{
+  let $bytes:=Q{java:java.io.ByteArrayOutputStream}new()
+  let $_:=Q{java:javax.imageio.ImageIO}write($bufferedImage , $type,  $bytes)
+  return Q{java:java.io.ByteArrayOutputStream}toByteArray($bytes)
+         =>convert:integers-to-base64()
+};
--- a/lib/pdfbox-3.0.3/pdfbox-3.0.3.jar
+++ b/lib/pdfbox-3.0.3/pdfbox-3.0.3.jar
--- a/lib/pdfbox-3.0.3.fat.jar
+++ b/lib/pdfbox-3.0.3.fat.jar
--- a/scripts/build.xqm
+++ b/scripts/build.xqm
@ -0,0 +1,51 @@
+(:~ build utils for REPO packaging :)
+module namespace build = 'urn:quodatum:build1';
+
+(:~ create a flat fat jar :)
+declare function build:fatjar-from-folder($input-dir as xs:string,$manifest-jar as xs:string)
+as xs:base64Binary { 
+    let $fold :=
+function ($res as map (*), $jar as xs:string) { 
+    let $bin :=file:read-binary($input-dir || $jar),
+        $paths := archive:entries($bin)/string()
+        [$jar eq $manifest-jar or not(starts-with( .,"META-INF/"))]
+    return
+        map { "name" : ($res? name, $paths), 
+              "content" : ($res? content,archive:extract-binary($bin, $paths)) } 
+}
+let $res := fold-left(file:list($input-dir, false(), "*.jar"), map { }, $fold)
+return
+    archive:create($res? name, $res? content,
+                   map { "format" : "zip", "algorithm" : "deflate" }) 
+};
+
+(:~ create a fat jar with lib :)
+declare function build:fatjar-with-lib($input-dir as xs:string,$manifest-jar as xs:string)
+ { 
+ let $bin :=file:read-binary($input-dir || $manifest-jar)
+  
+ let $lib:=file:list($input-dir || "lib/", false(), "*.jar")!concat("lib/",.)
+ let $name:= (archive:entries($bin)/string()
+              ,$lib)
+ let  $content:=(archive:extract-binary($bin,$name)
+                ,$lib!file:read-binary($input-dir || .))
+return  archive:create($name, $content,
+                   map { "format" : "zip", "algorithm" : "deflate" }) 
+};
+
+(:~ update-manifest :)
+declare function build:update-manifest($jar  as xs:base64Binary,$main-class as xs:string)
+as xs:base64Binary{
+(: let $mf:=archive:extract-text($jar,"META-INF/MANIFEST.MF") :)
+
+let $mf2:=concat("Manifest-Version: 1.0&#xD;&#xA;Main-Class: ",
+                 $main-class,
+                 "&#xD;&#xA;&#xD;&#xA;")
+return archive:update($jar,"META-INF/MANIFEST.MF",$mf2)
+};
+
+(:~ update-manifest :)
+declare function build:update($jar as xs:base64Binary,$name  as xs:string,$file as xs:string)
+as xs:base64Binary{
+archive:update($jar,$name,$file)
+}; 
--- a/scripts/make-fat-jar.xq
+++ b/scripts/make-fat-jar.xq
@ -0,0 +1,23 @@
+
+import module namespace build = 'urn:quodatum:build1' at 'build.xqm';
+
+(: Main execution
+Main-Class: org.basex.modules.Hello
+ :)
+let $config :=map { 
+         "manifest-jar" : "pdfbox-3.0.3.jar", 
+         "input-dir" :  "C:\Users\mrwhe\git\expkg-zone58\pdfbox\jars\", 
+         "output" :  "../lib/pdfbox-3.0.3.fat.jar",
+         "main-class": "org.apache.pdfbox.Loader" 
+         }
+
+let $fat-jar := build:fatjar-with-lib($config?input-dir,$config?manifest-jar)
+
+let $fat-jar:=build:update-manifest($fat-jar, $config?main-class)
+let $name:=replace($config?main-class,"\.","/") || ".xqm"
+let $content:=file:read-binary($config?input-dir || "loader.xqm")
+let $fat-jar:=archive:update($fat-jar, $name,$content)
+let $output-file := file:resolve-path($config?output, $config?input-dir)
+return (file:write-binary($output-file, $fat-jar),
+        trace($output-file,"fat jar: "))
+  
--- a/src/test/test.xqm
+++ b/src/test/test.xqm
@ -2,7 +2,7 @@

 :)
 module namespace test="urn:expkg-zone58:pdfbox3:tests";
-import module namespace pdfbox="urn:expkg-zone58:pdfbox3" at "../lib/pdfbox3.xqm";
+import module namespace pdfbox="org.apache.pdfbox.Loader";

 declare variable $test:base:=file:base-dir()=>file:parent()=>file:parent();