1
0
Fork 0

[fix] combined packaging

This commit is contained in:
Andy Bunce 2025-01-24 11:10:24 +00:00
parent 4bcfaefcc0
commit 1fe7edc1f7
10 changed files with 362 additions and 10 deletions

View file

@ -10,17 +10,23 @@ on:
jobs:
test:
runs-on: basex-10.7
test:
runs-on: ubuntu-latest
steps:
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: 18
- name: Checkout repository
uses: actions/checkout@v2
- name: Set up Java
uses: actions/setup-java@v2
with:
java-version: '11'
- name: Install BaseX
run: |
wget http://files.basex.org/releases/9.6.3/BaseX963.zip
unzip BaseX963.zip -d basex
- name: Run BaseX Tests
run: 'basex/bin/basex -t .'
run: |
./basex/bin/basex -c"RUN tests/test.bxs"

View file

@ -1 +1 @@
{"cells":[{"kind":1,"language":"markdown","value":"# PDFBox3 \r\nA BaseX 10+ interface to Apache PDFBox® library version 3 \r\n## Apache PDFBox® - A Java PDF Library\r\n\r\nThe Apache PDFBox® library is an open source Java tool for working with PDF documents. This project allows creation of new PDF documents, manipulation of existing documents and the ability to extract content from documents. Apache PDFBox also includes several command-line utilities. Apache PDFBox is published under the Apache License v2.0.\r\nhttps://pdfbox.apache.org/"},{"kind":1,"language":"markdown","value":"It comes with the useful PDF debug tool `java -jar debugger-app-3.0.2.jar`"},{"kind":1,"language":"markdown","value":"## Set up XQuery context for following code..."},{"kind":2,"language":"xquery","value":"(:<:)(: XQuery Context :)\r\nimport module namespace pdfbox = \"urn:expkg-zone58:pdfbox3\" at \"../src/lib/pdfbox3.xqm\";\r\nimport module namespace bookpages = 'urn:bookpages' at \"../src/lib/bookpages.xqm\";\r\nimport module namespace pdfscrape = 'urn:pdfscrape' at \"../src/lib/pdfscrape.xqm\";\r\nimport module namespace config = 'urn:abc-clio:config' at 'C:\\Users\\mrwhe\\git\\bloomsbury\\content-architecture\\xquery\\ABC-CLIO/lib/abc-config.xqm';\r\n\r\ndeclare variable $samples:= map{\r\n \"climate\": \"drop-01d\\set\\2-6-1\\A5579C_1\\271989---Book_File-Web_PDF_9798400627484_486728.pdf\",\r\n \"women\": \"drop-01d\\set\\2-6-1\\A6229C_1\\257334---Book_File-Web_PDF_9798216172628_486742.pdf\",\r\n \"genocide\": \"drop1-pdf\\GR2967-TRD\\272791---Book_File-Web_PDF_9798400640216_486366.pdf\",\r\n \"world\": \"drop-01c\\gpg-book\\2-6\\A3506C-TRD\\256186---Book_File-Web_PDF_9798216038955_486148.pdf\"\r\n};\r\ndeclare variable $PDF:= (: $samples?women=>file:resolve-path($config:data) :)\r\n\"C:\\Users\\mrwhe\\git\\bloomsbury\\content-architecture\\xquery\\ABC-CLIO\\data\\drop-01e\\set\\2-6-1\\A5690C_1\\257107---Book_File-Web_PDF_9798400691218_486731.pdf\";"},{"kind":1,"language":"markdown","value":" ## Check pdfbox version"},{"kind":2,"language":"xquery","value":"pdfbox:version()"},{"kind":1,"language":"markdown","value":"PDF specification version used by document"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)=>pdfbox:pdfVersion()"},{"kind":1,"language":"markdown","value":"# Page count for PDF"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)=>pdfbox:page-count()"},{"kind":1,"language":"markdown","value":"# save range to new pdf"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)=>pdfbox:extract(2,12,\"c:\\tmp\\a.pdf\")"},{"kind":1,"language":"markdown","value":"## Outline / bookmarks"},{"kind":1,"language":"markdown","value":"### sequence of maps"},{"kind":2,"language":"xquery","value":"\r\npdfbox:open($PDF)=>pdfbox:outline()"},{"kind":1,"language":"markdown","value":"XML"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)=>pdfbox:outline()=>pdfbox:outline-xml()"},{"kind":1,"language":"markdown","value":"## Page labels"},{"kind":2,"language":"xquery","value":"\r\npdfbox:open($PDF)=>pdfbox:pageLabels()"},{"kind":1,"language":"markdown","value":"# getText from page index"},{"kind":2,"language":"xquery","value":"let $doc:=pdfbox:open($PDF)\r\nreturn pdfbox:getText($doc,56)"},{"kind":1,"language":"markdown","value":"# Page scraping"},{"kind":1,"language":"markdown","value":"## pdf scrape text analysis"},{"kind":2,"language":"xquery","value":"let $doc:=pdfbox:open($PDF)\r\nreturn pdfscrape:page-report($doc)\r\n"},{"kind":1,"language":"markdown","value":"## Inverted pageno map"},{"kind":2,"language":"xquery","value":"let $doc:=pdfbox:open($PDF)\r\nreturn pdfscrape:page-report($doc)=>pdfscrape:inverted-map()"},{"kind":1,"language":"markdown","value":"# Save images"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)\r\n=> pdfbox:pageBufferedImage(99,1)\r\n=>pdfbox:imageSave(\"c:\\tmp\\page3.png\",\"png\")\r\n"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)\r\n=> pdfbox:pageBufferedImage(3,0.25)\r\n=>pdfbox:imageBinary(\"jpg\")"},{"kind":1,"language":"markdown","value":"## report"},{"kind":2,"language":"xquery","value":"declare variable $a:=file:resolve-path(\"../data/1e/\",file:base-dir());\r\n\r\nfor $f in file:list($a,true(),\"*.pdf\") \r\nwhere not(contains($f,\"outputs\"))\r\nlet $doc:=pdfbox:open(file:resolve-path($f,$a))\r\n(: let $outline:=pdfbox:outline($doc) :)\r\nlet $count:=pdfbox:page-count($doc)\r\norder by $count \r\nreturn ``[`{$f}`: `{ $count }`]``"},{"kind":2,"language":"xquery","value":"declare variable $a:=file:resolve-path(\"../data/1e/\",file:base-dir());\r\n\r\nfor $f at $pos in file:list($a,true(),\"*.pdf\") \r\nwhere not(contains($f,\"outputs\"))\r\nreturn pdfbox:open(file:resolve-path($f,$a))\r\n=> pdfbox:pageAsImage(0,0.25)\r\n=> pdfbox:imageSave(``[c:\\tmp\\titles\\p`{$pos}`.gif]``,\"gif\")"}]}
{"cells":[{"kind":1,"language":"markdown","value":"# PDFBox3 \r\nA BaseX 10+ interface to Apache PDFBox® library version 3 \r\n## Apache PDFBox® - A Java PDF Library\r\n\r\nThe Apache PDFBox® library is an open source Java tool for working with PDF documents. This project allows creation of new PDF documents, manipulation of existing documents and the ability to extract content from documents. Apache PDFBox also includes several command-line utilities. Apache PDFBox is published under the Apache License v2.0.\r\nhttps://pdfbox.apache.org/"},{"kind":1,"language":"markdown","value":"It comes with the useful PDF debug tool `java -jar debugger-app-3.0.2.jar`"},{"kind":1,"language":"markdown","value":"## Set up XQuery context for following code..."},{"kind":2,"language":"xquery","value":"(:<:)(: XQuery Context :)\r\nimport module namespace pdfbox = \"urn:expkg-zone58:pdfbox3\" at \"../src/lib/pdfbox3.xqm\";\r\nimport module namespace bookpages = 'urn:bookpages' at \"../src/lib/bookpages.xqm\";\r\nimport module namespace pdfscrape = 'urn:pdfscrape' at \"../src/lib/pdfscrape.xqm\";\r\nimport module namespace config = 'urn:abc-clio:config' at 'C:\\Users\\mrwhe\\git\\bloomsbury\\content-architecture\\xquery\\ABC-CLIO/lib/abc-config.xqm';\r\n\r\ndeclare variable $samples:= map{\r\n \"climate\": \"drop-01d\\set\\2-6-1\\A5579C_1\\271989---Book_File-Web_PDF_9798400627484_486728.pdf\",\r\n \"women\": \"drop-01d\\set\\2-6-1\\A6229C_1\\257334---Book_File-Web_PDF_9798216172628_486742.pdf\",\r\n \"genocide\": \"drop1-pdf\\GR2967-TRD\\272791---Book_File-Web_PDF_9798400640216_486366.pdf\",\r\n \"world\": \"drop-01c\\gpg-book\\2-6\\A3506C-TRD\\256186---Book_File-Web_PDF_9798216038955_486148.pdf\"\r\n};\r\ndeclare variable $PDF:= (: $samples?women=>file:resolve-path($config:data) :)\"C:\\Users\\mrwhe\\git\\bloomsbury\\content-architecture\\xquery\\ABC-CLIO\\data\\drop-01e\\set\\2-6-1\\A5690C_1\\257107---Book_File-Web_PDF_9798400691218_486731.pdf\"; \r\n"},{"kind":1,"language":"markdown","value":" ## Check pdfbox version"},{"kind":2,"language":"xquery","value":"pdfbox:version()"},{"kind":1,"language":"markdown","value":"PDF specification version used by document"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)=>pdfbox:pdfVersion()"},{"kind":1,"language":"markdown","value":"# Page count for PDF"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)=>pdfbox:page-count()"},{"kind":1,"language":"markdown","value":"# save range to new pdf"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)=>pdfbox:extract(2,12,\"c:\\tmp\\a.pdf\")"},{"kind":1,"language":"markdown","value":"## Outline / bookmarks"},{"kind":1,"language":"markdown","value":"### sequence of maps"},{"kind":2,"language":"xquery","value":"\r\npdfbox:open($PDF)=>pdfbox:outline()"},{"kind":1,"language":"markdown","value":"XML"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)=>pdfbox:outline()=>pdfbox:outline-xml()"},{"kind":1,"language":"markdown","value":"## Page labels"},{"kind":2,"language":"xquery","value":"\r\npdfbox:open($PDF)=>pdfbox:pageLabels()"},{"kind":1,"language":"markdown","value":"# getText from page index"},{"kind":2,"language":"xquery","value":"let $doc:=pdfbox:open($PDF)\r\nreturn pdfbox:getText($doc,56)"},{"kind":1,"language":"markdown","value":"# Page scraping"},{"kind":1,"language":"markdown","value":"## pdf scrape text analysis"},{"kind":2,"language":"xquery","value":"let $doc:=pdfbox:open($PDF)\r\nreturn pdfscrape:page-report($doc)\r\n"},{"kind":1,"language":"markdown","value":"## Inverted pageno map"},{"kind":2,"language":"xquery","value":"let $doc:=pdfbox:open($PDF)\r\nreturn pdfscrape:page-report($doc)=>pdfscrape:inverted-map()"},{"kind":1,"language":"markdown","value":"# Save images"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)\r\n=> pdfbox:pageBufferedImage(99,1)\r\n=>pdfbox:imageSave(\"c:\\tmp\\page3.png\",\"png\")\r\n"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)\r\n=> pdfbox:pageBufferedImage(3,0.25)\r\n=>pdfbox:imageBinary(\"jpg\")"},{"kind":1,"language":"markdown","value":"## report"},{"kind":2,"language":"xquery","value":"declare variable $a:=file:resolve-path(\"../data/1e/\",file:base-dir());\r\n\r\nfor $f in file:list($a,true(),\"*.pdf\") \r\nwhere not(contains($f,\"outputs\"))\r\nlet $doc:=pdfbox:open(file:resolve-path($f,$a))\r\n(: let $outline:=pdfbox:outline($doc) :)\r\nlet $count:=pdfbox:page-count($doc)\r\norder by $count \r\nreturn ``[`{$f}`: `{ $count }`]``"},{"kind":2,"language":"xquery","value":"declare variable $a:=file:resolve-path(\"../data/1e/\",file:base-dir());\r\n\r\nfor $f at $pos in file:list($a,true(),\"*.pdf\") \r\nwhere not(contains($f,\"outputs\"))\r\nreturn pdfbox:open(file:resolve-path($f,$a))\r\n=> pdfbox:pageAsImage(0,0.25)\r\n=> pdfbox:imageSave(``[c:\\tmp\\titles\\p`{$pos}`.gif]``,\"gif\")"}]}

272
jars/loader.xqm Normal file
View file

@ -0,0 +1,272 @@
xquery version '3.1';
(:~
pdfbox 3.0 https://pdfbox.apache.org/ BaseX 10.7+ interface library,
requires pdfbox jar on classpath
3.02+ required tested with pdfbox-app-3.0.2.jar
@see download https://pdfbox.apache.org/download.cgi
@javadoc https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.2/
:)
module namespace pdfbox="org.apache.pdfbox.Loader";
declare namespace Loader ="java:org.apache.pdfbox.Loader";
declare namespace PDFTextStripper = "java:org.apache.pdfbox.text.PDFTextStripper";
(:~
@see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.0/org/apache/pdfbox/pdmodel/PDDocument.html
:)
declare namespace PDDocument ="java:org.apache.pdfbox.pdmodel.PDDocument";
declare namespace PDDocumentCatalog ="java:org.apache.pdfbox.pdmodel.PDDocumentCatalog";
declare namespace PDPageLabels ="java:org.apache.pdfbox.pdmodel.common.PDPageLabels";
(:~
@see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.0/org/apache/pdfbox/multipdf/PageExtractor.html
:)
declare namespace PageExtractor ="java:org.apache.pdfbox.multipdf.PageExtractor";
(:~
@see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.0/org/apache/pdfbox/pdmodel/PDPageTree.html
:)
declare namespace PDPageTree ="java:org.apache.pdfbox.pdmodel.PDPageTree";
(:~
@see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.2/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/PDDocumentOutline.html
:)
declare namespace PDDocumentOutline ="java:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline";
declare namespace PDDocumentInformation ="java:org.apache.pdfbox.pdmodel.PDDocumentInformation";
(:~
@see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.0/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/PDOutlineItem.html
:)
declare namespace PDOutlineItem="java:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem";
declare namespace PDFRenderer="java:org.apache.pdfbox.rendering.PDFRenderer";
declare namespace RandomAccessReadBufferedFile = "java:org.apache.pdfbox.io.RandomAccessReadBufferedFile";
declare namespace File ="java:java.io.File";
(:~ version of pdfbox:)
declare function pdfbox:version()
as xs:string{
Q{java:org.apache.pdfbox.util.Version}getVersion()
};
(:~ open pdf, returns handle :)
declare function pdfbox:open($pdfpath as xs:string)
as item(){
Loader:loadPDF( RandomAccessReadBufferedFile:new($pdfpath))
};
(:~ the PDF specification version this document conforms to.:)
declare function pdfbox:pdfVersion($doc as item())
as xs:float{
PDDocument:getVersion($doc)
};
(:~ save pdf $doc to $savepath , returns $savepath :)
declare function pdfbox:save($doc as item(),$savepath as xs:string)
as xs:string{
PDDocument:save($doc,File:new($savepath)),$savepath
};
declare function pdfbox:close($doc as item())
as empty-sequence(){
(# db:wrapjava void #) {
PDDocument:close($doc)
}
};
declare function pdfbox:page-count($doc as item())
as xs:integer{
PDDocument:getNumberOfPages($doc)
};
(:~ map with document metadata :)
declare function pdfbox:information($doc as item())
as map(*){
let $info:=PDDocument:getDocumentInformation($doc)
return map{
"title": PDDocumentInformation:getTitle($info),
"creator": PDDocumentInformation:getCreator($info),
"producer": PDDocumentInformation:getProducer($info),
"subject": PDDocumentInformation:getSubject($info),
"keywords": PDDocumentInformation:getKeywords($info),
"creationdate": pdfbox:gregToISO(PDDocumentInformation:getCreationDate($info)),
"author": PDDocumentInformation:getAuthor($info)
}
};
(:~ convert date :)
declare
function pdfbox:gregToISO($item as item())
as xs:string{
Q{java:java.util.GregorianCalendar}toZonedDateTime($item)=>string()
};
(:~ outline for $doc as map()* :)
declare function pdfbox:outline($doc as item())
as map(*)*{
(# db:wrapjava some #) {
let $outline:=
PDDocument:getDocumentCatalog($doc)
=>PDDocumentCatalog:getDocumentOutline()
return if(exists($outline))
then pdfbox:outline($doc,PDOutlineItem:getFirstChild($outline))
}
};
(:~ return bookmark info for children of $outlineItem as seq of maps :)
declare function pdfbox:outline($doc as item(),$outlineItem as item()?)
as map(*)*{
let $find as map(*):=pdfbox:_outline($doc ,$outlineItem)
return map:get($find,"list")
};
(: BaseX bug 10.7? error if inlined in outline :)
declare function pdfbox:_outline($doc as item(),$outlineItem as item()?)
as map(*){
hof:until(
function($output) { empty($output?this) },
function($input ) {
let $bk:= pdfbox:bookmark($input?this,$doc)
let $bk:= if($bk?hasChildren)
then let $kids:=pdfbox:outline($doc,PDOutlineItem:getFirstChild($input?this))
return map:merge(($bk,map:entry("children",$kids)))
else $bk
return map{
"list": ($input?list, $bk),
"this": PDOutlineItem:getNextSibling($input?this)}
},
map{"list":(),"this":$outlineItem}
)
};
(:~ outline as xml :)
declare function pdfbox:outline-xml($outline as map(*)*)
as element(outline){
element outline {
$outline!pdfbox:bookmark-xml(.)
}
};
declare function pdfbox:bookmark-xml($outline as map(*)*)
as element(bookmark)*
{
$outline!
<bookmark title="{?title}" index="{?index}">
{?children!pdfbox:bookmark-xml(.)}
</bookmark>
};
(: return bookmark info for children of $outlineItem :)
declare function pdfbox:bookmark($bookmark as item(),$doc as item())
as map(*)
{
map{
"index": PDOutlineItem:findDestinationPage($bookmark,$doc)=>pdfbox:pageIndex($doc),
"title": (# db:checkstrings #) {PDOutlineItem:getTitle($bookmark)}=>translate("<22>",""),
"hasChildren": PDOutlineItem:hasChildren($bookmark)
}
};
declare function pdfbox:outx($page ,$document)
{
let $currentPage := PDOutlineItem:findDestinationPage($page,$document)
let $pageNumber := pdfbox:pageIndex($currentPage,$document)
return $pageNumber
};
(:~ pageIndex of $page in $document :)
declare function pdfbox:pageIndex(
$page as item()? (: as java:org.apache.pdfbox.pdmodel.PDPage :),
$document)
as item()?
{
if(exists($page))
then PDDocument:getDocumentCatalog($document)
=>PDDocumentCatalog:getPages()
=>PDPageTree:indexOf($page)
};
(:~ save new PDF doc from 1 based page range
@return save path :)
declare function pdfbox:extract($doc as item(),
$start as xs:integer,$end as xs:integer,$target as xs:string)
as xs:string
{
let $a:=PageExtractor:new($doc, $start, $end) =>PageExtractor:extract()
return (pdfbox:save($a,$target),pdfbox:close($a))
};
(:~ pageLabel info
@see https://www.w3.org/TR/WCAG20-TECHS/PDF17.html#PDF17-examples
@see https://codereview.stackexchange.com/questions/286078/java-code-showing-page-labels-from-pdf-files
:)
declare function pdfbox:getPageLabels($doc as item())
as item()
{
PDDocument:getDocumentCatalog($doc)
=>PDDocumentCatalog:getPageLabels()
};
(:~ pageLabel for every page:)
declare function pdfbox:pageLabels($doc as item())
as xs:string*
{
PDDocument:getDocumentCatalog($doc)
=>PDDocumentCatalog:getPageLabels()
=>PDPageLabels:getLabelsByPageIndices()
};
(:~ return text on $pageNo :)
declare function pdfbox:getText($doc as item(), $pageNo as xs:integer)
as xs:string{
let $tStripper := (# db:wrapjava instance #) {
PDFTextStripper:new()
=> PDFTextStripper:setStartPage($pageNo)
=> PDFTextStripper:setEndPage($pageNo)
}
return (# db:checkstrings #) {PDFTextStripper:getText($tStripper,$doc)}
};
(:~ summary info as map for $pdfpath :)
declare function pdfbox:report($pdfpath as xs:string)
as map(*){
let $doc:=pdfbox:open($pdfpath)
return (map{
"file": $pdfpath,
"pages": pdfbox:page-count($doc),
"outline": pdfbox:outline($doc)=>count()
},pdfbox:information($doc)
)=>map:merge()
};
(:~ java:bufferedImage for $pageNo using $scale times dpi= 72
@param $pageNo (ZERO based)
@param $scale 1=72 dpi
@return Java java.awt.image.BufferedImage object
:)
declare function pdfbox:pageBufferedImage($doc as item(), $pageNo as xs:integer,$scale as xs:float)
as item(){
PDFRenderer:new($doc)=>PDFRenderer:renderImage($pageNo,$scale)
};
(:~ save bufferedimage to $dest
@param $type = "gif","png" etc:)
declare function pdfbox:imageSave($bufferedImage as item(),$dest as xs:string,$type as xs:string)
as xs:boolean{
Q{java:javax.imageio.ImageIO}write($bufferedImage , $type, File:new($dest))
};
(:~ return image
@param $type = "gif","png" etc:)
declare function pdfbox:imageBinary($bufferedImage as item(),$type as xs:string)
as xs:base64Binary{
let $bytes:=Q{java:java.io.ByteArrayOutputStream}new()
let $_:=Q{java:javax.imageio.ImageIO}write($bufferedImage , $type, $bytes)
return Q{java:java.io.ByteArrayOutputStream}toByteArray($bytes)
=>convert:integers-to-base64()
};

BIN
lib/pdfbox-3.0.3.fat.jar Normal file

Binary file not shown.

51
scripts/build.xqm Normal file
View file

@ -0,0 +1,51 @@
(:~ build utils for REPO packaging :)
module namespace build = 'urn:quodatum:build1';
(:~ create a flat fat jar :)
declare function build:fatjar-from-folder($input-dir as xs:string,$manifest-jar as xs:string)
as xs:base64Binary {
let $fold :=
function ($res as map (*), $jar as xs:string) {
let $bin :=file:read-binary($input-dir || $jar),
$paths := archive:entries($bin)/string()
[$jar eq $manifest-jar or not(starts-with( .,"META-INF/"))]
return
map { "name" : ($res? name, $paths),
"content" : ($res? content,archive:extract-binary($bin, $paths)) }
}
let $res := fold-left(file:list($input-dir, false(), "*.jar"), map { }, $fold)
return
archive:create($res? name, $res? content,
map { "format" : "zip", "algorithm" : "deflate" })
};
(:~ create a fat jar with lib :)
declare function build:fatjar-with-lib($input-dir as xs:string,$manifest-jar as xs:string)
{
let $bin :=file:read-binary($input-dir || $manifest-jar)
let $lib:=file:list($input-dir || "lib/", false(), "*.jar")!concat("lib/",.)
let $name:= (archive:entries($bin)/string()
,$lib)
let $content:=(archive:extract-binary($bin,$name)
,$lib!file:read-binary($input-dir || .))
return archive:create($name, $content,
map { "format" : "zip", "algorithm" : "deflate" })
};
(:~ update-manifest :)
declare function build:update-manifest($jar as xs:base64Binary,$main-class as xs:string)
as xs:base64Binary{
(: let $mf:=archive:extract-text($jar,"META-INF/MANIFEST.MF") :)
let $mf2:=concat("Manifest-Version: 1.0&#xD;&#xA;Main-Class: ",
$main-class,
"&#xD;&#xA;&#xD;&#xA;")
return archive:update($jar,"META-INF/MANIFEST.MF",$mf2)
};
(:~ update-manifest :)
declare function build:update($jar as xs:base64Binary,$name as xs:string,$file as xs:string)
as xs:base64Binary{
archive:update($jar,$name,$file)
};

23
scripts/make-fat-jar.xq Normal file
View file

@ -0,0 +1,23 @@
import module namespace build = 'urn:quodatum:build1' at 'build.xqm';
(: Main execution
Main-Class: org.basex.modules.Hello
:)
let $config :=map {
"manifest-jar" : "pdfbox-3.0.3.jar",
"input-dir" : "C:\Users\mrwhe\git\expkg-zone58\pdfbox\jars\",
"output" : "../lib/pdfbox-3.0.3.fat.jar",
"main-class": "org.apache.pdfbox.Loader"
}
let $fat-jar := build:fatjar-with-lib($config?input-dir,$config?manifest-jar)
let $fat-jar:=build:update-manifest($fat-jar, $config?main-class)
let $name:=replace($config?main-class,"\.","/") || ".xqm"
let $content:=file:read-binary($config?input-dir || "loader.xqm")
let $fat-jar:=archive:update($fat-jar, $name,$content)
let $output-file := file:resolve-path($config?output, $config?input-dir)
return (file:write-binary($output-file, $fat-jar),
trace($output-file,"fat jar: "))

View file

@ -2,7 +2,7 @@
:)
module namespace test="urn:expkg-zone58:pdfbox3:tests";
import module namespace pdfbox="urn:expkg-zone58:pdfbox3" at "../lib/pdfbox3.xqm";
import module namespace pdfbox="org.apache.pdfbox.Loader";
declare variable $test:base:=file:base-dir()=>file:parent()=>file:parent();