Compare commits
3 commits
301303b4d6
...
ed84a3b342
Author | SHA1 | Date | |
---|---|---|---|
ed84a3b342 | |||
a3bef9d6f4 | |||
196ffa7526 |
11 changed files with 145 additions and 81 deletions
1
.vscode/settings.json
vendored
1
.vscode/settings.json
vendored
|
@ -1,5 +1,6 @@
|
|||
{
|
||||
"basexTools.xquery.profile": "basex-10",
|
||||
"basexTools.xquery.showHovers": false,
|
||||
"basexTools.xquery.executionDefault": "basexclient",
|
||||
|
||||
}
|
2
.xqdoca
2
.xqdoca
|
@ -1,4 +1,4 @@
|
|||
<xqdoca xmlns="urn:quodatum:xqdoca" version="1.0">
|
||||
<source>jars/</source>
|
||||
<source>src/</source>
|
||||
<target>docs/xqdoc/</target>
|
||||
</xqdoca>
|
|
@ -1,3 +1,9 @@
|
|||
# 0.3.6 2025-05-31
|
||||
* Add metadata function
|
||||
* rename page-size->page-media-box
|
||||
# 0.3.1 2025-05-28
|
||||
* update to Apache pdfbox to 3.0.5
|
||||
* API name changes e.g. page-count->number-of-pages
|
||||
# 0.2.7 2025-02-18
|
||||
* reduce memory use
|
||||
* add open from xs:base64Binary
|
||||
|
|
12
doc.md
12
doc.md
|
@ -78,10 +78,10 @@ let $text := pdfbox:page-text($pdf, 1) (: Extract text from page 1 :)
|
|||
---
|
||||
|
||||
### Rendering a Page as an Image
|
||||
You can render a PDF page as an image using the `pdfbox:page-image` function. Supported formats include `jpg`, `png`, `bmp`, and `gif`.
|
||||
You can render a PDF page as an image using the `pdfbox:page-render` function. Supported formats include `jpg`, `png`, `bmp`, and `gif`.
|
||||
|
||||
```xquery
|
||||
let $image := pdfbox:page-image($pdf, 1, map{"format": "png", "scale": 2})
|
||||
let $image := pdfbox:page-render($pdf, 1, map{"format": "png", "scale": 2})
|
||||
```
|
||||
|
||||
- `format`: The image format (default is `jpg`).
|
||||
|
@ -90,10 +90,10 @@ let $image := pdfbox:page-image($pdf, 1, map{"format": "png", "scale": 2})
|
|||
---
|
||||
|
||||
### Extracting a Range of Pages
|
||||
To extract a range of pages from a PDF, use the `pdfbox:extract` function.
|
||||
To extract a range of pages from a PDF, use the `pdfbox:extract-range` function.
|
||||
|
||||
```xquery
|
||||
let $extracted := pdfbox:extract($pdf, 1, 3) (: Extract pages 1 to 3 :)
|
||||
let $extracted := pdfbox:extract-range($pdf, 1, 3) (: Extract pages 1 to 3 :)
|
||||
```
|
||||
|
||||
The result is a new PDF document in binary format.
|
||||
|
@ -162,10 +162,10 @@ let $labels := pdfbox:labels($pdf)
|
|||
---
|
||||
|
||||
### Getting Page Size
|
||||
To get the size of a specific page, use the `pdfbox:page-size` function.
|
||||
To get the size of a specific page, use the `pdfbox:page-media-box` function.
|
||||
|
||||
```xquery
|
||||
let $size := pdfbox:page-size($pdf, 1) (: Get size of page 1 :)
|
||||
let $size := pdfbox:page-media-box($pdf, 1) (: Get size of page 1 :)
|
||||
```
|
||||
|
||||
---
|
||||
|
|
1
docs/pdf.xqbk
Normal file
1
docs/pdf.xqbk
Normal file
|
@ -0,0 +1 @@
|
|||
{"cells":[{"kind":2,"language":"xquery","value":"import module namespace pdfbox=\"org.expkg_zone58.Pdfbox3\";\r\nlet $a:=pdfbox:open(\"C:\\Users\\mrwhe\\git\\expkg-zone58\\pdfbox\\data\\1e\\gpg-book\\2-5-1\\B4541C-TRD\\255894---Book_File-Web_PDF_9798400668005_486272.pdf\")\r\nreturn pdfbox:labels($a)"}]}
|
19
package.json
19
package.json
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "pdfbox",
|
||||
"version": "0.2.7",
|
||||
"version": "0.3.6",
|
||||
"description": "A BaseX interface to Apache Pdfbox version 3",
|
||||
"main": "src/Pdfbox3.xqm",
|
||||
"homepage": "https://github.com/expkg-zone58/pdfbox#readme",
|
||||
|
@ -8,8 +8,9 @@
|
|||
"doc": "docs"
|
||||
},
|
||||
"scripts": {
|
||||
"test": "%BASEX10%/bin/basex -Wt tests",
|
||||
"docs": "xqdoca"
|
||||
"test": "%BASEX10%/bin/basex -Wt tests",
|
||||
"docs": "xqdoca",
|
||||
"build": "%BASEX10%/bin/basex scripts/make-xar.xq"
|
||||
},
|
||||
"keywords": [
|
||||
"pdf",
|
||||
|
@ -22,11 +23,13 @@
|
|||
"expkg_zone58": {
|
||||
"namespace": "org.expkg_zone58.Pdfbox3",
|
||||
"main-class": "org.apache.pdfbox.pdmodel.PDDocument",
|
||||
"maven": [
|
||||
"org/apache/pdfbox/pdfbox/3.0.4/pdfbox-3.0.4.jar",
|
||||
"org/apache/pdfbox/pdfbox-io/3.0.4/pdfbox-io-3.0.4.jar",
|
||||
"org/apache/pdfbox/fontbox/3.0.4/fontbox-3.0.4.jar",
|
||||
"commons-logging/commons-logging/1.3.4/commons-logging-1.3.4.jar"
|
||||
"manifest-jar" :"pdfbox-3.0.5.jar",
|
||||
"output" : "dist/pdfbox-3.0.5.fat.jar",
|
||||
"maven2": [
|
||||
"org.apache.pdfbox:pdfbox:3.0.5",
|
||||
"org.apache.pdfbox:pdfbox-io:3.0.5",
|
||||
"org.apache.pdfbox:fontbox:3.0.5",
|
||||
"commons-logging:commons-logging:1.3.5"
|
||||
]
|
||||
|
||||
}
|
||||
|
|
|
@ -20,12 +20,12 @@ The features focus on extracting information from PDFs rather than creation or e
|
|||
* save pdf page range to a new pdf.
|
||||
* save image of rendered pdf page.
|
||||
* open PDF with password
|
||||
* read XMP metadata
|
||||
* Page size information
|
||||
* support for xs:base64Binary in function inputs and outputs to facilitate database and store usage.
|
||||
|
||||
### Not supported:
|
||||
* creating completely new PDFs
|
||||
* Page size information
|
||||
* XMP processing
|
||||
* creating PDFs with new content
|
||||
* Form processing
|
||||
|
||||
## Documentation
|
||||
|
@ -41,7 +41,7 @@ import module namespace pdfbox="org.expkg_zone58.Pdfbox3";
|
|||
|
||||
pdfbox:with-pdf("...path/to/pdf.pdf",
|
||||
function($pdf){
|
||||
(1 to pdfbox:page-count($pdf))!pdfbox:page-text($pdf,.)
|
||||
(1 to pdfbox:number-of-pages($pdf))!pdfbox:page-text($pdf,.)
|
||||
}
|
||||
)
|
||||
```
|
||||
|
|
|
@ -9,6 +9,8 @@ declare namespace pkg='http://expath.org/ns/pkg';
|
|||
declare variable $build:archive-opts:= map { "format" : "zip", "algorithm" : "deflate" };
|
||||
|
||||
declare variable $build:base:= file:resolve-path("../",static-base-uri())=>trace("base ");
|
||||
|
||||
(:~ load "npm style" package.json :)
|
||||
declare variable $build:PKG:=json:doc(file:resolve-path("package.json",$build:base),map{"format":"xquery"});
|
||||
|
||||
(:~ return binary for fat jar from jars in $input-dir
|
||||
|
@ -94,7 +96,7 @@ as xs:string{
|
|||
|
||||
declare function build:xar-create()
|
||||
as xs:base64Binary{
|
||||
let $_:=build:maven-download($build:PKG?expkg_zone58?maven=>array:flatten(),$build:base || "jars/")
|
||||
let $_:=build:maven-download($build:PKG?expkg_zone58?maven2=>array:flatten(),$build:base || "jars/")
|
||||
let $entries:=
|
||||
build:xar-add(map{},build:jars("content"),build:jars("download")!build:content(.))
|
||||
=>build:xar-add("content/Pdfbox3.xqm",build:content("src/Pdfbox3.xqm"))
|
||||
|
@ -124,27 +126,45 @@ as xs:string{
|
|||
|
||||
declare function build:jars($style as xs:string)
|
||||
as xs:string*{
|
||||
let $src:=$build:PKG?expkg_zone58?maven=>array:flatten()
|
||||
let $names:= $src!replace(.,"^.*/","")
|
||||
let $artifacts:=$build:PKG?expkg_zone58?maven2=>array:flatten()
|
||||
let $names:= $artifacts!build:maven-slug(.)!file:name(.)
|
||||
return switch($style)
|
||||
case "name" return $names
|
||||
case "download" return $names!concat("jars/",.)
|
||||
case "content" return $names!concat("content/",.)
|
||||
default return $src
|
||||
default return $names
|
||||
};
|
||||
|
||||
(:~ download $files from $urls to $destdir:)
|
||||
declare variable $build:REPO as xs:string external :="https://repo1.maven.org/maven2/";
|
||||
declare function build:maven-download($urls as xs:string*,$destdir as xs:string)
|
||||
|
||||
declare function build:maven-download($artifacts as xs:string*,$destdir as xs:string)
|
||||
as empty-sequence(){
|
||||
file:create-dir($destdir),
|
||||
for $f in $urls
|
||||
let $dest:=$destdir || replace($f,"^.*/","")
|
||||
for $id in $artifacts
|
||||
let $slug:=build:maven-slug($id)
|
||||
let $dest:=$destdir || file:name($slug)
|
||||
where not(file:exists($dest))
|
||||
return build:write-binary($dest, fetch:binary(resolve-uri($f,$build:REPO)
|
||||
return build:write-binary($dest, fetch:binary(resolve-uri($slug,$build:REPO)
|
||||
=>trace("Download: ")))
|
||||
};
|
||||
|
||||
(:~ non-rooted url for maven artifact :)
|
||||
declare function build:maven-slug($artifact as xs:string)
|
||||
as xs:string{
|
||||
|
||||
let $parts:=if(matches($artifact,'[^:]+:[^:]+:[^:]+'))
|
||||
then tokenize($artifact,":")
|
||||
else error(xs:QName('build:maven-slug'),"invalid format required 'groupId:id:version'")
|
||||
|
||||
return (
|
||||
translate($parts[1],".","/"),
|
||||
$parts[2],
|
||||
$parts[3],
|
||||
string-join(($parts[2] , "-" , $parts[3] , ".jar"),"")
|
||||
)=>string-join("/")
|
||||
};
|
||||
|
||||
(:~ write-binary, creating dir if required :)
|
||||
declare function build:write-binary($dest as xs:string,$contents as xs:base64Binary?)
|
||||
as empty-sequence(){
|
||||
|
|
|
@ -3,29 +3,18 @@ import module namespace build = 'urn:quodatum:build1' at 'build.xqm';
|
|||
|
||||
declare variable $base:= file:resolve-path("../",static-base-uri())=>trace("base ");
|
||||
|
||||
declare variable $maven-urls := (
|
||||
"org/apache/pdfbox/pdfbox/3.0.4/pdfbox-3.0.4.jar",
|
||||
"org/apache/pdfbox/pdfbox-io/3.0.4/pdfbox-io-3.0.4.jar",
|
||||
"org/apache/pdfbox/fontbox/3.0.4/fontbox-3.0.4.jar",
|
||||
"commons-logging/commons-logging/1.3.4/commons-logging-1.3.4.jar"
|
||||
);
|
||||
|
||||
let $config :=map {
|
||||
"manifest-jar" : "pdfbox-3.0.4.jar",
|
||||
"input-dir" : "jars/",
|
||||
"output" : "dist/pdfbox-3.0.4.fat.jar",
|
||||
"main-class": "org.expkg_zone58.Pdfbox3"
|
||||
}
|
||||
|
||||
let $jar-path:=file:resolve-path($config?input-dir,$base)=>trace("jar: ")
|
||||
let $_:=build:maven-download($maven-urls,$jar-path)
|
||||
let $fat-jar := build:fatjar-from-folder($jar-path,$config?manifest-jar)
|
||||
let $jar-path:=$build:base || "jars/"=>trace("jar: ")
|
||||
let $_:=build:maven-download($build:PKG?expkg_zone58?maven2=>array:flatten(),
|
||||
$build:base || "jars/")
|
||||
|
||||
let $fat-jar:=build:update-manifest($fat-jar, $config?main-class)
|
||||
let $name:=replace($config?main-class,"\.","/") || ".xqm"
|
||||
let $fat-jar := build:fatjar-from-folder($jar-path,$build:PKG?expkg_zone58?manifest-jar)
|
||||
|
||||
let $fat-jar:=build:update-manifest($fat-jar, $build:PKG?expkg_zone58?main-class)
|
||||
let $name:=replace($build:PKG?expkg_zone58?main-class,"\.","/") || ".xqm"
|
||||
let $content:=file:read-binary($base || "src/Pdfbox3.xqm")
|
||||
let $fat-jar:=archive:update($fat-jar, $name,$content)
|
||||
let $output-file := file:resolve-path($config?output,$base)
|
||||
let $output-file := file:resolve-path($build:PKG?expkg_zone58?output,$base)
|
||||
return (build:write-binary($output-file, $fat-jar),
|
||||
trace($output-file,"fat jar: "))
|
||||
|
|
@ -1,10 +1,10 @@
|
|||
xquery version '3.1';
|
||||
(:~
|
||||
pdfbox 3.0 https://pdfbox.apache.org/ BaseX 10.7+ interface library,
|
||||
A BaseX 10.7+ interface to pdfbox 3.0 https://pdfbox.apache.org/ ,
|
||||
requires pdfbox jars on classpath, i.e. in custom or xar
|
||||
tested with pdfbox-app-3.0.4.jar
|
||||
@see download https://pdfbox.apache.org/download.cgi
|
||||
@javadoc https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.4/
|
||||
tested with pdfbox-app-3.0.5.jar
|
||||
@see https://pdfbox.apache.org/download.cgi
|
||||
@javadoc https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.5/
|
||||
@author Andy Bunce 2025
|
||||
:)
|
||||
|
||||
|
@ -16,12 +16,18 @@ declare namespace PDDocument ="java:org.apache.pdfbox.pdmodel.PDDocument";
|
|||
declare namespace PDDocumentCatalog ="java:org.apache.pdfbox.pdmodel.PDDocumentCatalog";
|
||||
declare namespace PDPageLabels ="java:org.apache.pdfbox.pdmodel.common.PDPageLabels";
|
||||
declare namespace PageExtractor ="java:org.apache.pdfbox.multipdf.PageExtractor";
|
||||
declare namespace PDPage ="org.apache.pdfbox.pdmodel.PDPage";
|
||||
declare namespace PDPage ="java:org.apache.pdfbox.pdmodel.PDPage";
|
||||
declare namespace PDPageTree ="java:org.apache.pdfbox.pdmodel.PDPageTree";
|
||||
declare namespace PDDocumentOutline ="java:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline";
|
||||
declare namespace PDDocumentInformation ="java:org.apache.pdfbox.pdmodel.PDDocumentInformation";
|
||||
declare namespace PDOutlineItem="java:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem";
|
||||
declare namespace PDFRenderer="java:org.apache.pdfbox.rendering.PDFRenderer";
|
||||
declare namespace PDMetadata="java:org.apache.pdfbox.pdmodel.common.PDMetadata";
|
||||
declare namespace COSInputStream="java:org.apache.pdfbox.cos.COSInputStream";
|
||||
|
||||
declare namespace rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#";
|
||||
|
||||
|
||||
declare namespace RandomAccessReadBuffer="java:org.apache.pdfbox.io.RandomAccessReadBuffer";
|
||||
declare namespace RandomAccessReadBufferedFile = "java:org.apache.pdfbox.io.RandomAccessReadBufferedFile";
|
||||
declare namespace PDRectangle="org.apache.pdfbox.pdmodel.common.PDRectangle";
|
||||
|
@ -54,8 +60,8 @@ pdfbox:open($pdfsrc, map{})
|
|||
};
|
||||
|
||||
(:~ open pdf from file/url/binary, opts may have password , returns pdf object
|
||||
@param $pdfsrc a fetchable url or a xs:base64Binary
|
||||
@param $opts map {"password":}
|
||||
@param $pdfsrc a fetchable url or filepath, or xs:base64Binary item
|
||||
@param $opts options otionally with map {"password":}
|
||||
:)
|
||||
declare function pdfbox:open($pdfsrc as item(), $opts as map(*))
|
||||
as item(){
|
||||
|
@ -75,7 +81,7 @@ as item(){
|
|||
}
|
||||
};
|
||||
|
||||
(:~ the version of the PDF specification used by $pdf e.g "1.4"
|
||||
(:~ The version of the PDF specification used by $pdf e.g "1.4"
|
||||
returned as string to avoid float rounding issues
|
||||
:)
|
||||
declare function pdfbox:specification($pdf as item())
|
||||
|
@ -83,13 +89,13 @@ as xs:string{
|
|||
PDDocument:getVersion($pdf)=>xs:decimal()=>round(4)=>string()
|
||||
};
|
||||
|
||||
(:~ save pdf $pdf to filesystem at $savepath , returns $savepath :)
|
||||
(:~ Save pdf $pdf to filesystem at $savepath , returns $savepath :)
|
||||
declare function pdfbox:save($pdf as item(),$savepath as xs:string)
|
||||
as xs:string{
|
||||
PDDocument:save($pdf, File:new($savepath)),$savepath
|
||||
};
|
||||
|
||||
(:~ $pdf as xs:base64Binary :)
|
||||
(:~ Create binary representation of $pdf object as xs:base64Binary :)
|
||||
declare function pdfbox:binary($pdf as item())
|
||||
as xs:base64Binary{
|
||||
let $bytes:=Q{java:java.io.ByteArrayOutputStream}new()
|
||||
|
@ -98,7 +104,7 @@ as xs:base64Binary{
|
|||
=>convert:integers-to-base64()
|
||||
};
|
||||
|
||||
(: release references to $pdf:)
|
||||
(:~ Release any resources related to $pdf:)
|
||||
declare function pdfbox:close($pdf as item())
|
||||
as empty-sequence(){
|
||||
(# db:wrapjava void #) {
|
||||
|
@ -106,15 +112,15 @@ as empty-sequence(){
|
|||
}
|
||||
};
|
||||
|
||||
(:~ number of pages in PDF:)
|
||||
declare function pdfbox:page-count($pdf as item())
|
||||
(:~ Number of pages in PDF:)
|
||||
declare function pdfbox:number-of-pages($pdf as item())
|
||||
as xs:integer{
|
||||
PDDocument:getNumberOfPages($pdf)
|
||||
};
|
||||
|
||||
(:~ render of $pdf page to image
|
||||
(:~ Pdf page as image (zero is cover)
|
||||
options.format="bmp jpg png gif" etc, options.scale= 1 is 72 dpi?? :)
|
||||
declare function pdfbox:page-image($pdf as item(),$pageNo as xs:integer,$options as map(*))
|
||||
declare function pdfbox:page-render($pdf as item(),$pageNo as xs:integer,$options as map(*))
|
||||
as xs:base64Binary{
|
||||
let $options:=map:merge(($options,map{"format":"jpg","scale":1}))
|
||||
let $bufferedImage:=PDFRenderer:new($pdf)=>PDFRenderer:renderImage($pageNo,$options?scale)
|
||||
|
@ -125,12 +131,13 @@ as xs:base64Binary{
|
|||
|
||||
};
|
||||
|
||||
|
||||
(:~ property access map
|
||||
keys are property names,
|
||||
values are sequences of functions to get property from $pdf object
|
||||
:)
|
||||
declare %private variable $pdfbox:property-map:=map{
|
||||
"pageCount": pdfbox:page-count#1,
|
||||
"pageCount": pdfbox:number-of-pages#1,
|
||||
|
||||
"hasOutline": pdfbox:hasOutline#1,
|
||||
|
||||
|
@ -166,7 +173,7 @@ declare %private variable $pdfbox:property-map:=map{
|
|||
};
|
||||
|
||||
(:~ known property names sorted :)
|
||||
declare function pdfbox:defined-properties()
|
||||
declare function pdfbox:property-names()
|
||||
as xs:string*{
|
||||
$pdfbox:property-map=>map:keys()=>sort()
|
||||
};
|
||||
|
@ -233,6 +240,37 @@ as xs:boolean{
|
|||
=>exists()
|
||||
};
|
||||
|
||||
(:~ XMP metadata as "RDF" document
|
||||
@note usually rdf:RDF root, but sometimes x:xmpmeta
|
||||
:)
|
||||
declare function pdfbox:metadata($pdf as item())
|
||||
as document-node(element(*))?
|
||||
{
|
||||
let $m:=PDDocument:getDocumentCatalog($pdf)
|
||||
=>PDDocumentCatalog:getMetadata()
|
||||
return if(exists($m))
|
||||
then
|
||||
let $is:=PDMetadata:exportXMPMetadata($m)
|
||||
return pdfbox:do-until(
|
||||
map{"n":0,"data":""},
|
||||
|
||||
function($input,$pos ) { pdfbox:read-stream($is,$input?data)},
|
||||
|
||||
function($output,$pos) { $output?n eq -1 }
|
||||
)?data=>parse-xml()
|
||||
else ()
|
||||
};
|
||||
|
||||
(:~ read next block from XMP stream :)
|
||||
declare %private function pdfbox:read-stream($is,$read as xs:string)
|
||||
as map(*){
|
||||
let $blen:=4096
|
||||
let $buff:=Q{java:java.util.Arrays}copyOf(array{xs:byte(0)},$blen)
|
||||
let $n:= COSInputStream:read($is,$buff,xs:int(0),xs:int($blen))
|
||||
let $data:=convert:integers-to-base64(subsequence($buff,1,$n))=>convert:binary-to-string()
|
||||
return map{"n":$n, "data": $read || $data}
|
||||
};
|
||||
|
||||
(:~ outline for $pdf as map()* :)
|
||||
declare function pdfbox:outline($pdf as item())
|
||||
as map(*)*{
|
||||
|
@ -275,7 +313,7 @@ as map(*){
|
|||
)
|
||||
};
|
||||
|
||||
(:~ outline as xml :)
|
||||
(:~ PDF outline in xml format :)
|
||||
declare function pdfbox:outline-xml($pdf as item())
|
||||
as element(outline)?{
|
||||
let $outline:=pdfbox:outline($pdf)
|
||||
|
@ -284,6 +322,7 @@ as element(outline)?{
|
|||
else ()
|
||||
};
|
||||
|
||||
(:~ recursive ouutline map to XML :)
|
||||
declare %private function pdfbox:bookmark-xml($outline as map(*)*)
|
||||
as element(bookmark)*
|
||||
{
|
||||
|
@ -293,8 +332,8 @@ as element(bookmark)*
|
|||
</bookmark>
|
||||
};
|
||||
|
||||
(:~ return bookmark info for children of $outlineItem
|
||||
@return map like{index:,title:,hasChildren:}
|
||||
(:~ return bookmark info for $bookmark
|
||||
@return map{index:..,title:..,hasChildren:..}
|
||||
:)
|
||||
declare %private function pdfbox:bookmark($bookmark as item(),$pdf as item())
|
||||
as map(*)
|
||||
|
@ -320,8 +359,11 @@ as item()?
|
|||
=>PDPageTree:indexOf($page)
|
||||
};
|
||||
|
||||
(:~ new PDF doc from 1 based page range as xs:base64Binary :)
|
||||
declare function pdfbox:extract($pdf as item(),
|
||||
(:~ Return new PDF doc with pages from $start to $end as xs:base64Binary, (1 based)
|
||||
@param $start first page to include
|
||||
@param $end last page to include
|
||||
:)
|
||||
declare function pdfbox:extract-range($pdf as item(),
|
||||
$start as xs:integer,$end as xs:integer)
|
||||
as xs:base64Binary
|
||||
{
|
||||
|
@ -355,15 +397,17 @@ as xs:string{
|
|||
return (# db:checkstrings #) {PDFTextStripper:getText($tStripper,$pdf)}
|
||||
};
|
||||
|
||||
(:~ return size of $pageNo zero based :)
|
||||
declare function pdfbox:page-size($pdf as item(), $pageNo as xs:integer)
|
||||
(:~ return size of $pageNo (zero based)
|
||||
@result e.g. [0.0,0.0,168.0,239.52]
|
||||
:)
|
||||
declare function pdfbox:page-media-box($pdf as item(), $pageNo as xs:integer)
|
||||
as xs:string{
|
||||
PDDocument:getPage($pdf, $pageNo)
|
||||
=>PDPage:getMediaBox()
|
||||
=>PDRectangle:toString()
|
||||
};
|
||||
|
||||
(:~ version of Apache Pdfbox in use e.g. "3.0.4" :)
|
||||
(:~ version of Apache Pdfbox in use e.g. "3.0.4" :)
|
||||
declare function pdfbox:version()
|
||||
as xs:string{
|
||||
Q{java:org.apache.pdfbox.util.Version}getVersion()
|
||||
|
@ -379,7 +423,7 @@ as xs:string?{
|
|||
};
|
||||
|
||||
(:~ fn:do-until shim for BaseX 9+10
|
||||
if fn:do-until not found use hof:until
|
||||
if fn:do-until not found use hof:until, note: $pos always zero
|
||||
:)
|
||||
declare %private function pdfbox:do-until(
|
||||
$input as item()*,
|
||||
|
|
|
@ -9,7 +9,7 @@ declare variable $test:base:=file:base-dir()=>file:parent();
|
|||
declare %unit:test
|
||||
function test:pdfbox-version(){
|
||||
let $v:= pdfbox:version()=>trace("VER: ")
|
||||
return unit:assert-equals($v,"3.0.4")
|
||||
return unit:assert-equals($v,"3.0.5")
|
||||
};
|
||||
|
||||
declare %unit:test
|
||||
|
@ -22,7 +22,7 @@ function test:specification(){
|
|||
declare %unit:test
|
||||
function test:page-count(){
|
||||
let $pdf:=test:open("samples.pdf/BaseX100.pdf")
|
||||
let $pages:=pdfbox:page-count($pdf)
|
||||
let $pages:=pdfbox:number-of-pages($pdf)
|
||||
return unit:assert-equals($pages,521)
|
||||
};
|
||||
|
||||
|
@ -53,7 +53,7 @@ function test:labels(){
|
|||
|
||||
let $labels:=pdfbox:labels($pdf)
|
||||
return (
|
||||
unit:assert-equals(count($labels),pdfbox:page-count($pdf)),
|
||||
unit:assert-equals(count($labels),pdfbox:number-of-pages($pdf)),
|
||||
unit:assert($labels[1]="i") ,
|
||||
unit:assert($labels[27]="1")
|
||||
)
|
||||
|
@ -63,7 +63,7 @@ declare %unit:test
|
|||
function test:extract(){
|
||||
let $pdf:=test:open("samples.pdf/BaseX100.pdf")
|
||||
let $dest:=file:create-temp-file("test",".pdf")=>trace("DEST: ")
|
||||
let $bin:=pdfbox:extract($pdf,2,12)
|
||||
let $bin:=pdfbox:extract-range($pdf,2,12)
|
||||
return unit:assert(true())
|
||||
};
|
||||
|
||||
|
@ -77,7 +77,7 @@ let $pdf:=test:open("samples.pdf/BaseX100.pdf")
|
|||
declare %unit:test
|
||||
function test:page-image(){
|
||||
let $pdf:=test:open("samples.pdf/BaseX100.pdf")
|
||||
let $image:=pdfbox:page-image($pdf,0,map{})
|
||||
let $image:=pdfbox:page-render($pdf,0,map{})
|
||||
return unit:assert(true())
|
||||
};
|
||||
|
||||
|
@ -94,7 +94,7 @@ declare %unit:test
|
|||
function test:with-url(){
|
||||
let $url:="https://files.basex.org/publications/Gath%20et%20al.%20%5b2009%5d,%20INEX%20Efficiency%20Track%20meets%20XQuery%20Full%20Text%20in%20BaseX.pdf"
|
||||
|
||||
let $count:=pdfbox:with-pdf($url,pdfbox:page-count#1)
|
||||
let $count:=pdfbox:with-pdf($url,pdfbox:number-of-pages#1)
|
||||
return unit:assert-equals($count,6)
|
||||
};
|
||||
|
||||
|
@ -141,13 +141,13 @@ function test:property(){
|
|||
declare %unit:test("expected", "pdfbox:property")
|
||||
function test:property-bad(){
|
||||
let $pdf:=test:open("samples.pdf/BaseX100.pdf")
|
||||
let $title:=pdfbox:property($pdf, "totle")
|
||||
let $title:=pdfbox:property($pdf, "badname")
|
||||
return unit:assert(exists($title))
|
||||
};
|
||||
(:~ Test for pdfbox:defined-properties function :)
|
||||
(:~ Test for pdfbox:property-names function :)
|
||||
declare %unit:test
|
||||
function test:defined-properties(){
|
||||
let $properties:=pdfbox:defined-properties()
|
||||
let $properties:=pdfbox:property-names()
|
||||
return unit:assert(exists($properties))
|
||||
};
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue