1
0
pdfbox/xquery/scratch/pdfbox.xq

22 lines
947 B
Plaintext
Raw Normal View History

2024-02-25 10:50:31 +00:00
(: PDFBOX experiments
:)
2024-02-27 09:49:34 +00:00
import module namespace pdfbox="urn:expkg-zone58:pdfbox:3" at "../lib/pdfbox3.xqm";
2024-02-25 10:50:31 +00:00
declare variable $samples:= map{
"climate": "data\drop-01d\set\2-6-1\A5579C_1\271989---Book_File-Web_PDF_9798400627484_486728.pdf",
"women": "data\drop-01d\set\2-6-1\A6229C_1\257334---Book_File-Web_PDF_9798216172628_486742.pdf",
"genocide": "data\drop1-pdf\GR2967-TRD\272791---Book_File-Web_PDF_9798400640216_486366.pdf",
"world": "data\drop-01c\gpg-book\2-6\A3506C-TRD\256186---Book_File-Web_PDF_9798216038955_486148.pdf",
"dummy": "lib\abc-clio-dummy.pdf"
};
declare variable $base:= "C:\Users\mrwhe\git\bloomsbury\content-architecture\xquery\ABC-CLIO\data";
(:~ resolve :)
declare variable $PDF:= $samples?climate=>file:resolve-path($base);
2024-02-27 09:49:34 +00:00
2024-02-25 10:50:31 +00:00
let $doc:=pdfbox:open($PDF)
2024-02-27 09:49:34 +00:00
return pdfbox:outline($doc)=>pdfbox:outline-XML()
2024-02-26 16:27:01 +00:00
(: return pdfbox:extract($doc,"c:\tmp\junk3.pdf",1,pdfbox:page-count($doc)) :)