This commit is contained in:
parent
5c9e32d119
commit
9f0bed7cd8
8 changed files with 64 additions and 40 deletions
|
@ -38,7 +38,7 @@ jobs:
|
|||
|
||||
- name: Verify BaseX installation
|
||||
run: |
|
||||
basex -v
|
||||
basex -c "SHOW OPTIONS"
|
||||
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
@ -47,7 +47,8 @@ jobs:
|
|||
|
||||
- name: Build package
|
||||
run: |
|
||||
basex scripts/install.bxs
|
||||
basex scripts/make-xar.xq
|
||||
basex scripts/repo-install.xq
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
|
|
|
@ -3,4 +3,4 @@
|
|||
c
|
||||
Xyxh
|
||||
4456
|
||||
9.7.4 xyxz
|
||||
9.7.4 xyxz0
|
3
.github/workflows/ci-basex.yaml
vendored
3
.github/workflows/ci-basex.yaml
vendored
|
@ -35,7 +35,8 @@ jobs:
|
|||
|
||||
- name: Build package
|
||||
run: |
|
||||
basex scripts/install.bxs
|
||||
basex scripts/make-xar.xq
|
||||
basex scripts/repo-install.xq
|
||||
|
||||
- name: run tests
|
||||
run: |
|
||||
|
|
3
changelog.md
Normal file
3
changelog.md
Normal file
|
@ -0,0 +1,3 @@
|
|||
## 0.1.5 2025-02-10
|
||||
* Add `isEncrypted`
|
||||
* Rename `open` to `open-file`
|
17
package.xml
17
package.xml
|
@ -1,17 +0,0 @@
|
|||
<package name="org.expkg_zone58.Pdfbox3"
|
||||
abbrev="pdfbox"
|
||||
version="0.1.1"
|
||||
spec="1.0">
|
||||
<component name="pdfbox-3.0.4.jar">
|
||||
<source type="maven">org/apache/pdfbox/pdfbox/3.0.4/pdfbox-3.0.4.jar</source>
|
||||
</component>
|
||||
<component name="pdfbox-io-3.0.4.jar">
|
||||
<source type="maven">org/apache/pdfbox/pdfbox-io/3.0.4/pdfbox-io-3.0.4.jar</source>
|
||||
</component>
|
||||
<component name="fontbox-3.0.4.jar">
|
||||
<source type="maven">org/apache/pdfbox/fontbox/3.0.4/fontbox-3.0.4.jar</source>
|
||||
</component>
|
||||
<component name="commons-logging-1.3.4.jar">
|
||||
<source type="maven">commons-logging/commons-logging/1.3.4/commons-logging-1.3.4.jar</source>
|
||||
</component>
|
||||
</package>
|
39
readme.md
Normal file
39
readme.md
Normal file
|
@ -0,0 +1,39 @@
|
|||
# Pdfbox
|
||||
A BaseX interface for [Pdfbox](https://pdfbox.apache.org/) version 3.
|
||||
It is packaged using the [Expath](https://docs.basex.org/main/Repository#expath_packaging) format, and is tested against BaseX 10.7 and 11.7. Note: currently (v0.1.5) also works on V9.7
|
||||
|
||||
* The Pdfbox 3 [FAQ](https://pdfbox.apache.org/3.0/faq.html) may be useful.
|
||||
## Features
|
||||
|
||||
The features focus on extracting information from PDFs rather than creation or editing.
|
||||
|
||||
* read PDF page count.
|
||||
* read any PDF outline and return as map(s) or XML.
|
||||
* read pagelabels.
|
||||
* read page text.
|
||||
* save pdf page range to a new pdf.
|
||||
* save image of rendered pdf page.
|
||||
|
||||
|
||||
|
||||
# Install
|
||||
Pre-built `pdfbox-x.y.z.zar` files are available on the releases page. They can be installed using the standard respository functions or using the GUI.
|
||||
|
||||
# Usage
|
||||
```xquery
|
||||
import module namespace pdfbox="org.expkg_zone58.Pdfbox3";
|
||||
|
||||
pdfbox:with-pdf("...path/to/pdf.pdf",
|
||||
function($pdf){
|
||||
(1 to pdfbox:page-count($pdf))!pdfbox:page-text($pdf,.)
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
## Build
|
||||
|
||||
* `scripts/make-xar.xq` packages the required `jar`s and `xqm` files to a `xar` file in the `dist` folder.
|
||||
|
||||
### Action support
|
||||
|
||||
The workflow `ci-basex.yaml` builds and tests the package. This can be used as an action on [github](https://github.com/features/actions), or on a local [gitea](https://docs.gitea.com/usage/actions/overview) installation.
|
|
@ -1,8 +0,0 @@
|
|||
# run query
|
||||
XQUERY "make xar.."
|
||||
RUN make-xar.xq
|
||||
XQUERY "Repo install.."
|
||||
RUN repo-install.xq
|
||||
REPO LIST
|
||||
|
||||
|
|
@ -28,14 +28,7 @@ declare namespace PDFRenderer="java:org.apache.pdfbox.rendering.PDFRenderer";
|
|||
declare namespace RandomAccessReadBufferedFile = "java:org.apache.pdfbox.io.RandomAccessReadBufferedFile";
|
||||
declare namespace File ="java:java.io.File";
|
||||
|
||||
declare variable $pdfbox:package-version:="0.1.2";
|
||||
|
||||
(:~ version of Apache Pdfbox in use e.g. "3.0.4"
|
||||
:)
|
||||
declare function pdfbox:version()
|
||||
as xs:string{
|
||||
Q{java:org.apache.pdfbox.util.Version}getVersion()
|
||||
};
|
||||
|
||||
(:~ with-document pattern: open pdf,apply function, close pdf
|
||||
creates a local pdfobject and ensures it is closed after use
|
||||
|
@ -44,7 +37,7 @@ e.g pdfbox:with-pdf("path...",pdfbox:page-text(?,5))
|
|||
declare function pdfbox:with-pdf($src as xs:string,
|
||||
$fn as function(item())as item()*)
|
||||
as item()*{
|
||||
let $pdf:=pdfbox:open($src)
|
||||
let $pdf:=pdfbox:open-file($src)
|
||||
return try{
|
||||
$fn($pdf),pdfbox:close($pdf)
|
||||
} catch *{
|
||||
|
@ -54,12 +47,12 @@ as item()*{
|
|||
};
|
||||
|
||||
(:~ open pdf, returns pdf object :)
|
||||
declare function pdfbox:open($pdfpath as xs:string)
|
||||
declare function pdfbox:open-file($pdfpath as xs:string)
|
||||
as item(){
|
||||
try{
|
||||
Loader:loadPDF( RandomAccessReadBufferedFile:new($pdfpath))
|
||||
} catch *{
|
||||
error(xs:QName("pdfbox:open"),"Failed to open: " || $pdfpath)
|
||||
error(xs:QName("pdfbox:open-file"),"Failed to open: " || $pdfpath)
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -122,7 +115,7 @@ as map(*){
|
|||
(:~ summary info as map for $pdfpath :)
|
||||
declare function pdfbox:report($pdfpath as xs:string)
|
||||
as map(*){
|
||||
let $pdf:=pdfbox:open($pdfpath)
|
||||
let $pdf:=pdfbox:open-file($pdfpath)
|
||||
return (map{
|
||||
"file": $pdfpath,
|
||||
"pages": pdfbox:page-count($pdf),
|
||||
|
@ -144,6 +137,12 @@ as xs:boolean{
|
|||
}
|
||||
};
|
||||
|
||||
(:~ true if $pdf is encrypted* :)
|
||||
declare function pdfbox:isEncrypted($pdf as item())
|
||||
as xs:boolean{
|
||||
PDDocument:isEncrypted($pdf)
|
||||
};
|
||||
|
||||
(:~ outline for $pdf as map()* :)
|
||||
declare function pdfbox:outline($pdf as item())
|
||||
as map(*)*{
|
||||
|
@ -268,6 +267,12 @@ as xs:string{
|
|||
return (# db:checkstrings #) {PDFTextStripper:getText($tStripper,$doc)}
|
||||
};
|
||||
|
||||
(:~ version of Apache Pdfbox in use e.g. "3.0.4" :)
|
||||
declare function pdfbox:version()
|
||||
as xs:string{
|
||||
Q{java:org.apache.pdfbox.util.Version}getVersion()
|
||||
};
|
||||
|
||||
(:~ convert date :)
|
||||
declare %private
|
||||
function pdfbox:gregToISO($item as item())
|
||||
|
|
Loading…
Add table
Reference in a new issue