1
0
pdfbox/xquery/scratch/pdfbox.xq
2024-02-26 16:27:01 +00:00

25 lines
1.1 KiB
Plaintext

(: PDFBOX experiments
:)
import module namespace pdfbox="urn:expkg-zone58:pdfbox:3" at "../lib/pdfbox-lib.xqm";
declare variable $samples:= map{
"climate": "data\drop-01d\set\2-6-1\A5579C_1\271989---Book_File-Web_PDF_9798400627484_486728.pdf",
"women": "data\drop-01d\set\2-6-1\A6229C_1\257334---Book_File-Web_PDF_9798216172628_486742.pdf",
"genocide": "data\drop1-pdf\GR2967-TRD\272791---Book_File-Web_PDF_9798400640216_486366.pdf",
"world": "data\drop-01c\gpg-book\2-6\A3506C-TRD\256186---Book_File-Web_PDF_9798216038955_486148.pdf",
"dummy": "lib\abc-clio-dummy.pdf"
};
declare variable $base:= "C:\Users\mrwhe\git\bloomsbury\content-architecture\xquery\ABC-CLIO\data";
(:~ resolve :)
declare variable $PDF:= $samples?climate=>file:resolve-path($base);
(: women pdfs :)
(: let $pdfdoc:="data\drop-01d\set\2-6-1\A6229C_1\outputs\9798216172628\2798216172625\pdfs\chunks-docbook.xml"
=>file:resolve-path(file:base-dir())
=>doc() :)
let $doc:=pdfbox:open($PDF)
return pdfbox:outline($doc)
(: return pdfbox:extract($doc,"c:\tmp\junk3.pdf",1,pdfbox:page-count($doc)) :)