1
0
Fork 0

[mod] tidy
Some checks failed
Run BaseX Tests / test (push) Failing after 28s

This commit is contained in:
Andy Bunce 2025-02-10 17:17:30 +00:00
parent 5c9e32d119
commit 9f0bed7cd8
8 changed files with 64 additions and 40 deletions

View file

@ -38,7 +38,7 @@ jobs:
- name: Verify BaseX installation
run: |
basex -v
basex -c "SHOW OPTIONS"
- name: Checkout repository
uses: actions/checkout@v4
@ -47,7 +47,8 @@ jobs:
- name: Build package
run: |
basex scripts/install.bxs
basex scripts/make-xar.xq
basex scripts/repo-install.xq
- name: Run tests
run: |

View file

@ -3,4 +3,4 @@
c
Xyxh
4456
9.7.4 xyxz
9.7.4 xyxz0

View file

@ -35,7 +35,8 @@ jobs:
- name: Build package
run: |
basex scripts/install.bxs
basex scripts/make-xar.xq
basex scripts/repo-install.xq
- name: run tests
run: |

3
changelog.md Normal file
View file

@ -0,0 +1,3 @@
## 0.1.5 2025-02-10
* Add `isEncrypted`
* Rename `open` to `open-file`

View file

@ -1,17 +0,0 @@
<package name="org.expkg_zone58.Pdfbox3"
abbrev="pdfbox"
version="0.1.1"
spec="1.0">
<component name="pdfbox-3.0.4.jar">
<source type="maven">org/apache/pdfbox/pdfbox/3.0.4/pdfbox-3.0.4.jar</source>
</component>
<component name="pdfbox-io-3.0.4.jar">
<source type="maven">org/apache/pdfbox/pdfbox-io/3.0.4/pdfbox-io-3.0.4.jar</source>
</component>
<component name="fontbox-3.0.4.jar">
<source type="maven">org/apache/pdfbox/fontbox/3.0.4/fontbox-3.0.4.jar</source>
</component>
<component name="commons-logging-1.3.4.jar">
<source type="maven">commons-logging/commons-logging/1.3.4/commons-logging-1.3.4.jar</source>
</component>
</package>

39
readme.md Normal file
View file

@ -0,0 +1,39 @@
# Pdfbox
A BaseX interface for [Pdfbox](https://pdfbox.apache.org/) version 3.
It is packaged using the [Expath](https://docs.basex.org/main/Repository#expath_packaging) format, and is tested against BaseX 10.7 and 11.7. Note: currently (v0.1.5) also works on V9.7
* The Pdfbox 3 [FAQ](https://pdfbox.apache.org/3.0/faq.html) may be useful.
## Features
The features focus on extracting information from PDFs rather than creation or editing.
* read PDF page count.
* read any PDF outline and return as map(s) or XML.
* read pagelabels.
* read page text.
* save pdf page range to a new pdf.
* save image of rendered pdf page.
# Install
Pre-built `pdfbox-x.y.z.zar` files are available on the releases page. They can be installed using the standard respository functions or using the GUI.
# Usage
```xquery
import module namespace pdfbox="org.expkg_zone58.Pdfbox3";
pdfbox:with-pdf("...path/to/pdf.pdf",
function($pdf){
(1 to pdfbox:page-count($pdf))!pdfbox:page-text($pdf,.)
}
)
```
## Build
* `scripts/make-xar.xq` packages the required `jar`s and `xqm` files to a `xar` file in the `dist` folder.
### Action support
The workflow `ci-basex.yaml` builds and tests the package. This can be used as an action on [github](https://github.com/features/actions), or on a local [gitea](https://docs.gitea.com/usage/actions/overview) installation.

View file

@ -1,8 +0,0 @@
# run query
XQUERY "make xar.."
RUN make-xar.xq
XQUERY "Repo install.."
RUN repo-install.xq
REPO LIST

View file

@ -28,14 +28,7 @@ declare namespace PDFRenderer="java:org.apache.pdfbox.rendering.PDFRenderer";
declare namespace RandomAccessReadBufferedFile = "java:org.apache.pdfbox.io.RandomAccessReadBufferedFile";
declare namespace File ="java:java.io.File";
declare variable $pdfbox:package-version:="0.1.2";
(:~ version of Apache Pdfbox in use e.g. "3.0.4"
:)
declare function pdfbox:version()
as xs:string{
Q{java:org.apache.pdfbox.util.Version}getVersion()
};
(:~ with-document pattern: open pdf,apply function, close pdf
creates a local pdfobject and ensures it is closed after use
@ -44,7 +37,7 @@ e.g pdfbox:with-pdf("path...",pdfbox:page-text(?,5))
declare function pdfbox:with-pdf($src as xs:string,
$fn as function(item())as item()*)
as item()*{
let $pdf:=pdfbox:open($src)
let $pdf:=pdfbox:open-file($src)
return try{
$fn($pdf),pdfbox:close($pdf)
} catch *{
@ -54,12 +47,12 @@ as item()*{
};
(:~ open pdf, returns pdf object :)
declare function pdfbox:open($pdfpath as xs:string)
declare function pdfbox:open-file($pdfpath as xs:string)
as item(){
try{
Loader:loadPDF( RandomAccessReadBufferedFile:new($pdfpath))
} catch *{
error(xs:QName("pdfbox:open"),"Failed to open: " || $pdfpath)
error(xs:QName("pdfbox:open-file"),"Failed to open: " || $pdfpath)
}
};
@ -122,7 +115,7 @@ as map(*){
(:~ summary info as map for $pdfpath :)
declare function pdfbox:report($pdfpath as xs:string)
as map(*){
let $pdf:=pdfbox:open($pdfpath)
let $pdf:=pdfbox:open-file($pdfpath)
return (map{
"file": $pdfpath,
"pages": pdfbox:page-count($pdf),
@ -144,6 +137,12 @@ as xs:boolean{
}
};
(:~ true if $pdf is encrypted* :)
declare function pdfbox:isEncrypted($pdf as item())
as xs:boolean{
PDDocument:isEncrypted($pdf)
};
(:~ outline for $pdf as map()* :)
declare function pdfbox:outline($pdf as item())
as map(*)*{
@ -268,6 +267,12 @@ as xs:string{
return (# db:checkstrings #) {PDFTextStripper:getText($tStripper,$doc)}
};
(:~ version of Apache Pdfbox in use e.g. "3.0.4" :)
declare function pdfbox:version()
as xs:string{
Q{java:org.apache.pdfbox.util.Version}getVersion()
};
(:~ convert date :)
declare %private
function pdfbox:gregToISO($item as item())