[mod] download
This commit is contained in:
parent
5bc6f02802
commit
a0cfa6d937
5 changed files with 110 additions and 80 deletions
|
@ -1,7 +1,9 @@
|
||||||
(:~ build utils for REPO packaging :)
|
(:~ build utils for REPO packaging :)
|
||||||
module namespace build = 'urn:quodatum:build1';
|
module namespace build = 'urn:quodatum:build1';
|
||||||
|
|
||||||
(:~ create a flat fat jar :)
|
(:~ create a flat fat jar from jars in $input-dir
|
||||||
|
keeping only META-INF from $manifest-jar
|
||||||
|
:)
|
||||||
declare function build:fatjar-from-folder($input-dir as xs:string,$manifest-jar as xs:string)
|
declare function build:fatjar-from-folder($input-dir as xs:string,$manifest-jar as xs:string)
|
||||||
as xs:base64Binary {
|
as xs:base64Binary {
|
||||||
let $fold :=
|
let $fold :=
|
||||||
|
@ -13,13 +15,16 @@ function ($res as map (*), $jar as xs:string) {
|
||||||
map { "name" : ($res? name, $paths),
|
map { "name" : ($res? name, $paths),
|
||||||
"content" : ($res? content,archive:extract-binary($bin, $paths)) }
|
"content" : ($res? content,archive:extract-binary($bin, $paths)) }
|
||||||
}
|
}
|
||||||
let $res := fold-left(file:list($input-dir, false(), "*.jar"), map { }, $fold)
|
let $res := file:list($input-dir, false(), "*.jar")
|
||||||
|
=>fold-left( map { }, $fold)
|
||||||
return
|
return
|
||||||
archive:create($res? name, $res? content,
|
archive:create($res? name, $res? content,
|
||||||
map { "format" : "zip", "algorithm" : "deflate" })
|
map { "format" : "zip", "algorithm" : "deflate" })
|
||||||
};
|
};
|
||||||
|
|
||||||
(:~ create a fat jar with lib :)
|
(:~ create a fat jar with lib
|
||||||
|
@remark
|
||||||
|
:)
|
||||||
declare function build:fatjar-with-lib($input-dir as xs:string,$manifest-jar as xs:string)
|
declare function build:fatjar-with-lib($input-dir as xs:string,$manifest-jar as xs:string)
|
||||||
{
|
{
|
||||||
let $bin :=file:read-binary($input-dir || $manifest-jar)
|
let $bin :=file:read-binary($input-dir || $manifest-jar)
|
||||||
|
@ -49,3 +54,14 @@ declare function build:update($jar as xs:base64Binary,$name as xs:string,$file
|
||||||
as xs:base64Binary{
|
as xs:base64Binary{
|
||||||
archive:update($jar,$name,$file)
|
archive:update($jar,$name,$file)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
(:~ download $files from $urls to $destdir:)
|
||||||
|
declare variable $build:REPO as xs:string external :="https://repo1.maven.org/maven2/";
|
||||||
|
declare function build:maven-download($urls as xs:string*,$destdir as xs:string)
|
||||||
|
as empty-sequence(){
|
||||||
|
for $f in $urls
|
||||||
|
let $dest:=$destdir || replace($f,"^.*/","")
|
||||||
|
where not(file:exists($dest))
|
||||||
|
return file:write-binary($dest, fetch:binary(resolve-uri($f,$build:REPO)=>trace("Download: ")))
|
||||||
|
};
|
|
@ -1,17 +0,0 @@
|
||||||
|
|
||||||
import module namespace build = 'urn:quodatum:build1' at 'build.xqm';
|
|
||||||
|
|
||||||
declare variable $files := (
|
|
||||||
"https://repo1.maven.org/maven2/org/apache/pdfbox/pdfbox/3.0.4/pdfbox-3.0.4.jar",
|
|
||||||
"https://repo1.maven.org/maven2/org/apache/pdfbox/pdfbox-io/3.0.4/pdfbox-io-3.0.4.jar",
|
|
||||||
"https://repo1.maven.org/maven2/org/apache/pdfbox/fontbox/3.0.4/fontbox-3.0.4.jar",
|
|
||||||
"https://repo1.maven.org/maven2/commons-logging/commons-logging/1.3.4/commons-logging-1.3.4.jar"
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
let $base:= file:resolve-path("../",static-base-uri())
|
|
||||||
let $target:=file:resolve-path("jars/",$base)
|
|
||||||
for $f in $files
|
|
||||||
let $n:=replace($f,"^.*/","") =>trace("N")
|
|
||||||
return file:write-binary($target || $n, fetch:binary($f))
|
|
|
@ -1,6 +1,12 @@
|
||||||
|
|
||||||
import module namespace build = 'urn:quodatum:build1' at 'build.xqm';
|
import module namespace build = 'urn:quodatum:build1' at 'build.xqm';
|
||||||
|
|
||||||
|
declare variable $urls := (
|
||||||
|
"org/apache/pdfbox/pdfbox/3.0.4/pdfbox-3.0.4.jar",
|
||||||
|
"org/apache/pdfbox/pdfbox-io/3.0.4/pdfbox-io-3.0.4.jar",
|
||||||
|
"org/apache/pdfbox/fontbox/3.0.4/fontbox-3.0.4.jar",
|
||||||
|
"commons-logging/commons-logging/1.3.4/commons-logging-1.3.4.jar"
|
||||||
|
);
|
||||||
(: Main execution
|
(: Main execution
|
||||||
Main-Class: org.basex.modules.Hello
|
Main-Class: org.basex.modules.Hello
|
||||||
:)
|
:)
|
||||||
|
@ -12,6 +18,7 @@ let $config :=map {
|
||||||
"main-class": "org.expkg_zone58.Pdfbox3"
|
"main-class": "org.expkg_zone58.Pdfbox3"
|
||||||
}
|
}
|
||||||
let $jar-path:=file:resolve-path($config?input-dir,$config?base=>trace("base "))=>trace("jar: ")
|
let $jar-path:=file:resolve-path($config?input-dir,$config?base=>trace("base "))=>trace("jar: ")
|
||||||
|
let $_:=build:maven-download($urls,$jar-path)
|
||||||
let $fat-jar := build:fatjar-from-folder($jar-path,$config?manifest-jar)
|
let $fat-jar := build:fatjar-from-folder($jar-path,$config?manifest-jar)
|
||||||
|
|
||||||
let $fat-jar:=build:update-manifest($fat-jar, $config?main-class)
|
let $fat-jar:=build:update-manifest($fat-jar, $config?main-class)
|
||||||
|
|
|
@ -49,10 +49,12 @@ as item(){
|
||||||
Loader:loadPDF( RandomAccessReadBufferedFile:new($pdfpath))
|
Loader:loadPDF( RandomAccessReadBufferedFile:new($pdfpath))
|
||||||
};
|
};
|
||||||
|
|
||||||
(:~ the version of the PDF specification used by $pdf :)
|
(:~ the version of the PDF specification used by $pdf e.g "1.4"
|
||||||
declare function pdfbox:pdfVersion($pdf as item())
|
returned as string to avoid rounding issues
|
||||||
as xs:float{
|
:)
|
||||||
PDDocument:getVersion($pdf)
|
declare function pdfbox:specification($pdf as item())
|
||||||
|
as xs:string{
|
||||||
|
PDDocument:getVersion($pdf)=>string()
|
||||||
};
|
};
|
||||||
|
|
||||||
(:~ save pdf $pdf to $savepath , returns $savepath :)
|
(:~ save pdf $pdf to $savepath , returns $savepath :)
|
||||||
|
@ -97,45 +99,49 @@ as xs:string{
|
||||||
Q{java:java.util.GregorianCalendar}toZonedDateTime($item)=>string()
|
Q{java:java.util.GregorianCalendar}toZonedDateTime($item)=>string()
|
||||||
};
|
};
|
||||||
|
|
||||||
(:~ outline for $doc as map()* :)
|
(:~ outline for $pdf as map()* :)
|
||||||
declare function pdfbox:outline($doc as item())
|
declare function pdfbox:outline($pdf as item())
|
||||||
as map(*)*{
|
as map(*)*{
|
||||||
(# db:wrapjava some #) {
|
(# db:wrapjava some #) {
|
||||||
let $outline:=
|
let $outline:=
|
||||||
PDDocument:getDocumentCatalog($doc)
|
PDDocument:getDocumentCatalog($pdf)
|
||||||
=>PDDocumentCatalog:getDocumentOutline()
|
=>PDDocumentCatalog:getDocumentOutline()
|
||||||
|
|
||||||
return if(exists($outline))
|
return if(exists($outline))
|
||||||
then pdfbox:outline($doc,PDOutlineItem:getFirstChild($outline))
|
then pdfbox:outline($pdf,PDOutlineItem:getFirstChild($outline))
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
(:~ return bookmark info for children of $outlineItem as seq of maps :)
|
(:~ return bookmark info for children of $outlineItem as seq of maps :)
|
||||||
declare function pdfbox:outline($doc as item(),$outlineItem as item()?)
|
declare function pdfbox:outline($pdf as item(),$outlineItem as item()?)
|
||||||
|
|
||||||
as map(*)*{
|
as map(*)*{
|
||||||
let $find as map(*):=pdfbox:_outline($doc ,$outlineItem)
|
let $find as map(*):=pdfbox:_outline($pdf ,$outlineItem)
|
||||||
return map:get($find,"list")
|
return map:get($find,"list")
|
||||||
};
|
};
|
||||||
|
|
||||||
(: BaseX bug 10.7? error if inlined in outline :)
|
(: BaseX bug 10.7? error if inlined in outline :)
|
||||||
declare %private function pdfbox:_outline($doc as item(),$outlineItem as item()?)
|
declare %private function pdfbox:_outline($pdf as item(),$outlineItem as item()?)
|
||||||
as map(*){
|
as map(*){
|
||||||
hof:until(
|
pdfbox:do-until(
|
||||||
function($output) { empty($output?this) },
|
|
||||||
|
map{"list":(),"this":$outlineItem},
|
||||||
|
|
||||||
function($input ) {
|
function($input ) {
|
||||||
let $bk:= pdfbox:bookmark($input?this,$doc)
|
let $bk:= pdfbox:bookmark($input?this,$pdf)
|
||||||
let $bk:= if($bk?hasChildren)
|
let $bk:= if($bk?hasChildren)
|
||||||
then let $kids:=pdfbox:outline($doc,PDOutlineItem:getFirstChild($input?this))
|
then let $kids:=pdfbox:outline($pdf,PDOutlineItem:getFirstChild($input?this))
|
||||||
return map:merge(($bk,map:entry("children",$kids)))
|
return map:merge(($bk,map:entry("children",$kids)))
|
||||||
else $bk
|
else $bk
|
||||||
return map{
|
return map{
|
||||||
"list": ($input?list, $bk),
|
"list": ($input?list, $bk),
|
||||||
"this": PDOutlineItem:getNextSibling($input?this)}
|
"this": PDOutlineItem:getNextSibling($input?this)}
|
||||||
},
|
},
|
||||||
map{"list":(),"this":$outlineItem}
|
|
||||||
|
function($output) { empty($output?this) }
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
|
||||||
(:~ outline as xml :)
|
(:~ outline as xml :)
|
||||||
declare function pdfbox:outline-xml($outline as map(*)*)
|
declare function pdfbox:outline-xml($outline as map(*)*)
|
||||||
as element(outline){
|
as element(outline){
|
||||||
|
@ -156,27 +162,22 @@ as element(bookmark)*
|
||||||
(:~ return bookmark info for children of $outlineItem
|
(:~ return bookmark info for children of $outlineItem
|
||||||
@return map like{index:,title:,hasChildren:}
|
@return map like{index:,title:,hasChildren:}
|
||||||
:)
|
:)
|
||||||
declare function pdfbox:bookmark($bookmark as item(),$doc as item())
|
declare function pdfbox:bookmark($bookmark as item(),$pdf as item())
|
||||||
as map(*)
|
as map(*)
|
||||||
{
|
{
|
||||||
map{
|
map{
|
||||||
"index": PDOutlineItem:findDestinationPage($bookmark,$doc)=>pdfbox:pageIndex($doc),
|
"index": PDOutlineItem:findDestinationPage($bookmark,$pdf)=>pdfbox:page-index($pdf),
|
||||||
"title": (# db:checkstrings #) {PDOutlineItem:getTitle($bookmark)}=>translate("<22>",""),
|
"title": (# db:checkstrings #) {PDOutlineItem:getTitle($bookmark)}=>translate("<22>",""),
|
||||||
"hasChildren": PDOutlineItem:hasChildren($bookmark)
|
"hasChildren": PDOutlineItem:hasChildren($bookmark)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
declare function pdfbox:outx($page ,$document)
|
|
||||||
{
|
|
||||||
let $currentPage := PDOutlineItem:findDestinationPage($page,$document)
|
|
||||||
let $pageNumber := pdfbox:pageIndex($currentPage,$document)
|
|
||||||
return $pageNumber
|
|
||||||
};
|
|
||||||
|
|
||||||
(:~ pageIndex of $page in $pdf :)
|
(:~ pageIndex of $page in $pdf :)
|
||||||
declare function pdfbox:pageIndex(
|
declare function pdfbox:page-index(
|
||||||
$page as item()? (: as java:org.apache.pdfbox.pdmodel.PDPage :),
|
$page as item()? (: as java:org.apache.pdfbox.pdmodel.PDPage :),
|
||||||
$pdf)
|
$pdf as item())
|
||||||
as item()?
|
as item()?
|
||||||
{
|
{
|
||||||
if(exists($page))
|
if(exists($page))
|
||||||
|
@ -198,28 +199,20 @@ as xs:string
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
(:~ pageLabel info
|
(:~ pageLabel for every page
|
||||||
@see https://www.w3.org/TR/WCAG20-TECHS/PDF17.html#PDF17-examples
|
@see https://www.w3.org/TR/WCAG20-TECHS/PDF17.html#PDF17-examples
|
||||||
@see https://codereview.stackexchange.com/questions/286078/java-code-showing-page-labels-from-pdf-files
|
@see https://codereview.stackexchange.com/questions/286078/java-code-showing-page-labels-from-pdf-files
|
||||||
:)
|
:)
|
||||||
declare function pdfbox:getPageLabels($pdf as item())
|
declare function pdfbox:labels($pdf as item())
|
||||||
as item()
|
|
||||||
{
|
|
||||||
PDDocument:getDocumentCatalog($pdf)
|
|
||||||
=>PDDocumentCatalog:getPageLabels()
|
|
||||||
};
|
|
||||||
|
|
||||||
(:~ pageLabel for every page:)
|
|
||||||
declare function pdfbox:pageLabels($doc as item())
|
|
||||||
as xs:string*
|
as xs:string*
|
||||||
{
|
{
|
||||||
PDDocument:getDocumentCatalog($doc)
|
PDDocument:getDocumentCatalog($pdf)
|
||||||
=>PDDocumentCatalog:getPageLabels()
|
=>PDDocumentCatalog:getPageLabels()
|
||||||
=>PDPageLabels:getLabelsByPageIndices()
|
=>PDPageLabels:getLabelsByPageIndices()
|
||||||
};
|
};
|
||||||
|
|
||||||
(:~ return text on $pageNo :)
|
(:~ return text on $pageNo :)
|
||||||
declare function pdfbox:getText($doc as item(), $pageNo as xs:integer)
|
declare function pdfbox:page-text($doc as item(), $pageNo as xs:integer)
|
||||||
as xs:string{
|
as xs:string{
|
||||||
let $tStripper := (# db:wrapjava instance #) {
|
let $tStripper := (# db:wrapjava instance #) {
|
||||||
PDFTextStripper:new()
|
PDFTextStripper:new()
|
||||||
|
@ -246,9 +239,9 @@ as map(*){
|
||||||
@param $scale 1=72 dpi
|
@param $scale 1=72 dpi
|
||||||
@return Java java.awt.image.BufferedImage object
|
@return Java java.awt.image.BufferedImage object
|
||||||
:)
|
:)
|
||||||
declare function pdfbox:pageBufferedImage($doc as item(), $pageNo as xs:integer,$scale as xs:float)
|
declare function pdfbox:pageBufferedImage($pdf as item(), $pageNo as xs:integer,$scale as xs:float)
|
||||||
as item(){
|
as item(){
|
||||||
PDFRenderer:new($doc)=>PDFRenderer:renderImage($pageNo,$scale)
|
PDFRenderer:new($pdf)=>PDFRenderer:renderImage($pageNo,$scale)
|
||||||
};
|
};
|
||||||
|
|
||||||
(:~ save bufferedimage to $dest
|
(:~ save bufferedimage to $dest
|
||||||
|
@ -267,3 +260,20 @@ as xs:base64Binary{
|
||||||
return Q{java:java.io.ByteArrayOutputStream}toByteArray($bytes)
|
return Q{java:java.io.ByteArrayOutputStream}toByteArray($bytes)
|
||||||
=>convert:integers-to-base64()
|
=>convert:integers-to-base64()
|
||||||
};
|
};
|
||||||
|
|
||||||
|
(:~ fn:do-until shim for BaseX 9+ :)
|
||||||
|
declare function pdfbox:do-until(
|
||||||
|
$input as item()*,
|
||||||
|
$action as function(item()*, xs:integer) as item()*,
|
||||||
|
$predicate as function(item()*, xs:integer) as xs:boolean?
|
||||||
|
) as item()*
|
||||||
|
{
|
||||||
|
let $fn:=function-lookup(QName('http://www.w3.org/2005/xpath-functions','do-until'), 3)
|
||||||
|
return if($fn)
|
||||||
|
then $fn($input,$action,$predicate)
|
||||||
|
else let $hof:=function-lookup(QName('http://basex.org/modules/hof','until'), 3)
|
||||||
|
return if($hof)
|
||||||
|
then $hof($predicate,$action,$input)
|
||||||
|
else error(xs:QName('pdfbox:do-until'),"No implementation found")
|
||||||
|
|
||||||
|
};
|
||||||
|
|
|
@ -7,39 +7,52 @@ import module namespace pdfbox="org.expkg_zone58.Pdfbox3";
|
||||||
declare variable $test:base:=file:base-dir()=>file:parent()=>file:parent();
|
declare variable $test:base:=file:base-dir()=>file:parent()=>file:parent();
|
||||||
|
|
||||||
|
|
||||||
|
declare %unit:test
|
||||||
|
function test:pdfbox-version(){
|
||||||
|
let $v:= pdfbox:version()=>trace("VER: ")
|
||||||
|
return unit:assert-equals($v,"3.0.4")
|
||||||
|
};
|
||||||
|
|
||||||
|
declare %unit:test
|
||||||
|
function test:specification(){
|
||||||
|
let $pdf:=test:pdf("samples.pdf/BaseX100.pdf")
|
||||||
|
let $spec:=pdfbox:specification($pdf)
|
||||||
|
return unit:assert-equals($spec,0+1.4)
|
||||||
|
};
|
||||||
|
|
||||||
declare %unit:test
|
declare %unit:test
|
||||||
function test:page-count(){
|
function test:page-count(){
|
||||||
let $PDF:="samples.pdf/BaseX100.pdf"=>test:resolve()
|
let $pdf:=test:pdf("samples.pdf/BaseX100.pdf")
|
||||||
let $pages:=pdfbox:open($PDF)=>pdfbox:page-count()
|
let $pages:=pdfbox:page-count($pdf)
|
||||||
return unit:assert-equals($pages,521)
|
return unit:assert-equals($pages,521)
|
||||||
};
|
};
|
||||||
|
|
||||||
declare %unit:test
|
declare %unit:test
|
||||||
function test:outline-none(){
|
function test:outline-none(){
|
||||||
let $PDF:="samples.pdf/BaseX100.pdf"=>test:resolve()
|
let $pdf:=test:pdf("samples.pdf/BaseX100.pdf")
|
||||||
let $outline:=pdfbox:open($PDF)=>pdfbox:outline()
|
let $outline:=pdfbox:outline($pdf)
|
||||||
return unit:assert(empty($outline))
|
return unit:assert(empty($outline))
|
||||||
};
|
};
|
||||||
|
|
||||||
declare %unit:test
|
declare %unit:test
|
||||||
function test:outline-present(){
|
function test:outline-present(){
|
||||||
let $PDF:="samples.pdf/icelandic-dictionary.pdf"=>test:resolve()
|
let $pdf:=test:pdf("samples.pdf/icelandic-dictionary.pdf")
|
||||||
let $outline:=pdfbox:open($PDF)=>pdfbox:outline()
|
let $outline:=pdfbox:outline($pdf)
|
||||||
return unit:assert(exists($outline))
|
return unit:assert(exists($outline))
|
||||||
};
|
};
|
||||||
|
|
||||||
declare %unit:test
|
declare %unit:test
|
||||||
function test:outline-xml(){
|
function test:outline-xml(){
|
||||||
let $PDF:="samples.pdf/icelandic-dictionary.pdf"=>test:resolve()
|
let $pdf:=test:pdf("samples.pdf/icelandic-dictionary.pdf")
|
||||||
let $outline:=pdfbox:open($PDF)=>pdfbox:outline()=>pdfbox:outline-xml()
|
let $outline:=pdfbox:outline($pdf)=>pdfbox:outline-xml()
|
||||||
return unit:assert-equals(count($outline/bookmark),31)
|
return unit:assert-equals(count($outline/bookmark),31)
|
||||||
};
|
};
|
||||||
|
|
||||||
declare %unit:test
|
declare %unit:test
|
||||||
function test:pagelabels(){
|
function test:pagelabels(){
|
||||||
let $PDF:="samples.pdf/BaseX100.pdf"=>test:resolve()
|
let $pdf:=test:pdf("samples.pdf/BaseX100.pdf")
|
||||||
let $labels:=pdfbox:open($PDF)=>pdfbox:pageLabels()
|
|
||||||
|
let $labels:=pdfbox:labels($pdf)
|
||||||
return (
|
return (
|
||||||
unit:assert($labels[1]="i") ,
|
unit:assert($labels[1]="i") ,
|
||||||
unit:assert($labels[27]="1")
|
unit:assert($labels[27]="1")
|
||||||
|
@ -47,20 +60,21 @@ function test:pagelabels(){
|
||||||
};
|
};
|
||||||
|
|
||||||
declare %unit:test
|
declare %unit:test
|
||||||
function test:save(){
|
function test:extract-save(){
|
||||||
|
let $pdf:=test:pdf("samples.pdf/BaseX100.pdf")
|
||||||
let $dest:=file:create-temp-file("test",".pdf")=>trace("DEST: ")
|
let $dest:=file:create-temp-file("test",".pdf")=>trace("DEST: ")
|
||||||
let $PDF:="samples.pdf/BaseX100.pdf"=>test:resolve()
|
let $outline:=pdfbox:extract($pdf,2,12,$dest)
|
||||||
let $outline:=pdfbox:open($PDF)=>pdfbox:extract(2,12,$dest)
|
|
||||||
return unit:assert(true())
|
return unit:assert(true())
|
||||||
};
|
};
|
||||||
|
|
||||||
declare %unit:test
|
declare %unit:test
|
||||||
function test:page-text(){
|
function test:page-text(){
|
||||||
let $PDF:="samples.pdf/BaseX100.pdf"=>test:resolve()
|
let $pdf:=test:pdf("samples.pdf/BaseX100.pdf")
|
||||||
let $text:=pdfbox:open($PDF)=>pdfbox:getText(1)
|
let $text:=pdfbox:page-text($pdf,1)
|
||||||
return unit:assert(starts-with($text,"BaseX Documentation"))
|
return unit:assert(starts-with($text,"BaseX Documentation"))
|
||||||
};
|
};
|
||||||
|
|
||||||
declare function test:resolve($file as xs:string){
|
declare function test:pdf($file as xs:string)
|
||||||
file:resolve-path($file,$test:base)
|
as item(){
|
||||||
|
file:resolve-path($file,$test:base)=>pdfbox:open()
|
||||||
};
|
};
|
||||||
|
|
Loading…
Add table
Reference in a new issue