From d71c016f0525eae516170a567ce34fed4fb590e5 Mon Sep 17 00:00:00 2001 From: Andy Bunce Date: Sun, 16 Feb 2025 22:31:13 +0000 Subject: [PATCH] [mod] 0.2.4 --- changelog.md | 6 +++ package.json | 4 +- readme.md | 14 +++++-- samples.pdf/page-numbers-password.pdf | Bin 0 -> 29296 bytes samples.pdf/readme.md | 1 + src/Pdfbox3.xqm | 52 +++++++++++++++++--------- tests/test.xqm | 40 +++++++++++++++++--- 7 files changed, 87 insertions(+), 30 deletions(-) create mode 100644 samples.pdf/page-numbers-password.pdf diff --git a/changelog.md b/changelog.md index 31cf537..c974278 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,9 @@ +## 0.2.4 2025-02-16 +* Add `property` +* rewrite `report` to return CSV style data +* replace `open-file` with `open` using `fetch:binary` to allow urls +* Mod `extract` returns xs:base64Binary +* password support ## 0.1.6 2025-02-14 * Add `hasLabels` * FIX #1 error if no labels diff --git a/package.json b/package.json index 6dba658..1a39c2f 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "pdfbox", - "version": "0.2.2", + "version": "0.2.4", "description": "A BaseX interface to Apache Pdfbox version 3", "main": "src/Pdfbox3.xqm", "homepage": "https://github.com/expkg-zone58/pdfbox#readme", @@ -8,7 +8,7 @@ "doc": "docs" }, "scripts": { - "test": "%BASEX10%/bin/basex -t tests", + "test": "%BASEX10%/bin/basex -Wt tests", "docs": "xqdoca" }, "keywords": [ diff --git a/readme.md b/readme.md index 5fde2fa..cacd901 100644 --- a/readme.md +++ b/readme.md @@ -11,17 +11,23 @@ A test suite is available and workflow actions run this on BaseX 10.7 and 11.7. ## Features - -The features focus on extracting information from PDFs rather than creation or editing. - +The features focus on extracting information from PDFs rather than creation or editing of PDFs. +### Supported * read PDF page count. * read any PDF outline and return as map(s) or XML. * read pagelabels. * read page text. * save pdf page range to a new pdf. * save image of rendered pdf page. +* open PDF with password +* support for xs:base64Binary in function inputs and outputs to support database and store usage. -AI (Deepseek) generated [documentation](doc.md) +### Not supported: +* creating completely new PDFs +* Page size information + +## Documentation +* Function [documentation](doc.md) * The Apache Pdfbox 3 [FAQ](https://pdfbox.apache.org/3.0/faq.html) may be useful. # Install diff --git a/samples.pdf/page-numbers-password.pdf b/samples.pdf/page-numbers-password.pdf new file mode 100644 index 0000000000000000000000000000000000000000..654e9a5c39aca18ee84736c99e75f1813d0c355c GIT binary patch literal 29296 zcmdSARa9Nuwk=AK;Fb^wK5=)~;O-tYxVr=i?i$=ZxVvkDySoKg7Kib_V;+T7MkTh|)!um5UdZY67>sjd63XIW|h-4lwF6N*aU zk?8rooE$ZP{;&6cKJi+aTkx3M15~IfsR4ifG1AaJ{xY#V{;EMe7H0KW0|3Kw1!Qfk zY_)Bk-&q4_9&2H4qiLfHpoXFn*VQr9{J#@D7Nu*Z^VG6GjrqHY0){3wx>f)x0h31_ zK3#2doyWF`>6+==7y#%PnV}wW);3nUnx;_Qt1&*MjlXs+XGY|DTBwAC#J07eJF^r{ z^5b`w8tKPA`$abdw$O-F;F~2arr!)M5wKRDdwf}t)7}F@X*_{JM%<16M(yR!?#K{* z2=3kwoL;+Cp;QYkVDXKVketr@Vp?Ro#j=5%ryIBi%Sf^&&|?*Hf1Wxy22pd_-Tw$Dv8Q_(;PK$_rb>G93x4Aw zsv>}reUNL81RyrJ!IjY??ZzldLEG>4Hzc$N{{YmY^uR^1no!rDZ8 z%a`X{m$}NZvKK|Mb!K(-_IO%_(+s0bT+$6-iECtG$o`4fbJqj@4jtn^L-)5jnV!Y* zzf7I9|1)*|Y0T@?NlX0?b*`V=d6#Q;(JE@4&J>@^JluH0L4aJ@h7eKb>&ikK}@8Dbb|Tvg(dA zXEy2!Y=UDzj=CDNpul)l!flwv2Gsjb;(F2dO1zqF&@=;*UxlJ1ezcdjQru-5ZEfFYD9ik@lOjG{9#U`Txgt`i?!Ta9xEgWWt+ zi}pfX9dWkc7fv@R-v!vS2Iz`R(>b=|l=A&MwK4xQ6o0FY<$tBx=>BJF`_q`$sf~{D zA8IqS7h914&oDN%d7~CwfEdJ0Vg<1oOcfsxZ_+#5RVylehapizCKKsfe_*{lgA>1aS14= zoeS#~C}HHPG@8VS9NHK#K{)D})fh_WwL(JzT>(o&xMF(+cz!6LGBEXKCXe-Eqr@6<>0xE%ghY-DY{}CmAMzOP+n)Spvn4S9ViC(=*zd7olJ?{w~CK|O;!&SUk? zWk1GTIH&2>#Ekx|=52Nj)*f){(=fNlbFT@1CL(lv$}n;9mck`Qqz1OT?1^Q9V75kq z9a5@*8I2k`T9mH|Y%RNuGMX&>ki8FK(U+^C%%dU%1zqzX2fXbZM_Ot!^8`R&IRaw2 z;DzM&ZJ>`c({;BKF&5`;3H1!5y}+gGH1o=x&+@Y<#+d6^6CDZ{OBvKhxzAyu1;2{W zWM{x%68dl><@F*NFf>$rQY%E7vDClb-kQ$iPn5JujjdZ9%%bVf7EW97KLT9Cd$G!D zBu-J42P|O-DB-G7J}?Gj?b<4QZ}_GOL{m8&AjV1mZtcjwYM7f+MyB@6rfX5TkmBm? z7Yl{FTi-Jk?nH}{D{yrDyxOoy$C}TkUz3NZB$@f)S6h$Q?^rPO;_}k*BLXUJ%J5;{ zUj-Z-MTUq$WL+{J^BP%wSk4VM92FkhjPx0iLUs}-l}rpajZw^@Tz&|2EV}DuiQQOG zPUHRlO{haHW=rM-`&l=^5~?G{P&bLe4;dvaCj?5 zm4>`5ZPpq}&5|pwv}NSaldt(KwnvIeeCwo?m3f6*2!35ZPtZrAX^M?#>-I0q_Buai zh{J55l4jiJ=tu@9slpX&xt#ix(fVO0S0IP<{Rm@69yC4P@lOnupOUuy7RjxrTY;^Q z6m2C;EqBtJD&Z>;V6giVk864-bk{P05Qh;UcCqK;9F@6Q|V`GH`Wzh2S;B+|7 zeB>&|bk$NcM*0R9j!ZSW6C1>4R1T3v`?s>VaQ)Du#Znf_v!)x(YguhTQ zFmSS}=8E$DydTL>ZAjz6ln&(1zUaQk+7m*<^;YWsw%UMb z%XJy@jUcOS59F8psJ=;OMu?eMW?~=FoULIJvD#7oWSqfr7tB1+*WW{LLXFA$E%Osc zuRAM0TUtThdkCDQ58z&Yyx=c$-lvUa$rL|c`Pm_X3Pe&XFGNIAzBVS^Z6!g`-udx! z^}-Ez*4)+bQqxV8={&qBT(d;l;2InG@9lcDF1BBx*;Ht#vgPu2U4i5DlJ!_rs6j-v zY=bZ(Z8@6;vEd;PC|V$Ud>FruKZjap)cq=|yz1YCnsm539!;qvlLEt;_N(p1ATUL$ zU5DQ+4XyJ%vri3pUs^43Cz}T&jfEne4FC1l@!d}!&Ma|L^D16Y!*^|I5Fc3-Rz3`Y zz0-8j+5+z#0x%Pksm4(FfU#u{YdVLdA9lyXNg^;7EdaH|NTVy$d+*_CjSrWAo}~uT z>Il{65n4ms=3^Pm0IG+;(SfgZ?Q=g#kA8&;yCi)0mNQtju^OiQBZo4(UC+*h?V}MC zu1*mA2!H%rMn>V?3Qri`2rgz;_u*OSsyS%SNCl#ivSuw(B+MJ;1k5wp5RZClnk&5P zl@uZG567UbfnNI33`kJxeI)Z2O`8io+CBq)a6SYTWC2r*XvURhxd(OP#~_LaKNYk; z_sO9jpQX^~pPq{%9?GvYL-65~ z2Y4bIa)(?*n!2qWaU)gF9239TDW!}d@S(MctzTh`0s0Jf$m`#df8? z9Pn9y9bEE)gZ!XOLp9|-Dcs&hfGj!Mkl4N*$h`kWQiT0OHuIJ=3+bbu@A*O=GyaWr zaDN5(MF812{x7ahBnFFLsIt-ihYKEKy1G9aCvPi!iqQ5hiIus+U9uN94WK}^m8lNq zXPiGW=BIX`l?&Xy34-{%;p>qob+q61^Ieu7D2`uJtsJot*@Ft_NB(0dU+NN(z&?V(}PfNc*DTzATaecGOJE5?XF;-;Xs~@ z>?oGVtc~oQMjK89?1Kwgk*}UhGZsRI5hV&@VC*?|90YKuIK$Xi^U%YhG4r7x3(bc< z;E!?lYkZY)h@Eo70A^-Jevlz0-uJB0ay|G-)CB|g{x&7=2jrrmud_X(hxnbgcNcVR z=FD59pL+&$A8L9AJnO~b+p=(XX8OxjNo556?0+^R?c^wt&Rz$$`lL?>+3w@ktF?n;#5~&{!MF z!BtH8bQ6#hhTi1p`YmE@eeXe>tRgX2+dk`dTX3sozp*dn4Pb$0MR%(m9DyC ze#Ev8F`wWI^mtk4R>)8eM-)Nm4L_yJ89SkY%{B{DcN`t&7JZN4VD>EvI=M5?v*%xK zWPb02qpWl?fX0h(DCJH1t-M5_WWU-7Lq zstJ)g5Ph06B<#p9JWR>=v^;mFD>FsKp>c9T-cD1)LL6pvOsj`qjhnuA&VB|5nZ$Fo zZZN%WEqtTcZXc%tzz?`gjx1+Mo`kVQi|Zj3Z(AMTZ<;v4C<@p?`tT4a7r;m^qkxP- zf<`0)PYeh=;9{_IqeSWd0Skp|#8E}TI?r3=D+zg&8Pdv}OWz!;G0p|twm%F8<&){# zeDuRkksCwmMl(V)D`C6E&sQcCWtWd|=6gMfnqiF646soZsubz`@UNDKhu=c)!Q}Q+ zL&=K4IS9FJ(L2i1&NVixt&vVrD*~76!hGB62bAfh*y|F9?4eH0$v9z!a&CC@k=mB2 zljEpSHR4MR1+QJR4&QD01g_4=N84-{XLxqMyk7_Nt+)S zb8372#w z_Fc|~eoIN$U(7QmX)B6^qt{ZsHxZR&@-CL)@8W7eq=?`Ho6H1XCur}_Vh%c6q}Uha zvXZhOtxaQCb9r+)O*TT66NN(k!K+X`>OuO~f@M(&RM}*-wldoqX7G;0c0S%C{$N>Z%3mmDcs)mg3}XE)4xY%= z(%Vk4=iilKK|+vZL`buTG9nVK!z)1DK8N>S@xt zcy^y5WP5+kb$=0s0s<`jaE4?}aEt1jFeBEb+Ivd~J;;Krcc~0IK)30+&0J~X#5%-b zQd6w{gu{6HUZzD^cCzXMv2~$rD%L%ewf-v=eH_;+AMBjF0>p68l! ziMJ`Vi=$MGcpEYgy{7FFQKj2S+|@1brqle3oZli=&zg2$$hAibOVMaC(GVHm+D>Y$ z7GI?nsws4~QIzA)qQVwTCuwiBAr#;BR+gxB}WeYjGV zbeH1?blH8^Qz}2Cavq}vM=n?`O%tGFB*94ix2NzIhPaHmiLSV&1%T#-A^z-2%US8_ z%9xwmKv9VTXr8U_f4SFxz5r;RUGIOqlF+rc5!H17(EmBI|7A`;A^&z{qyRL3%73)p zWdT&Oy4p|VDl|`SIDqDl8!7{!d9ptNG=JQx$EPQI3PAhEJdgpGYiu*3)3?T(=!Xx6N}o5 znErY7cT6R0O|2hqI{-}2<0S(M_F47U8|Rj{AN0jr9J{A9)F>z_zX4mtu#<ANL0U}&ssZu@r?1Rh7j zKM}kJnpTe=|EiF%*`ESs4K)8;D*fx_KSsh+|NLq3|6_Vr`+F=u1@@^0|8EU}f3)C_ zBA;6D|8G?MgYl2FpBVodD1Xg-PtSk8Jl!Wg&U{bf>1pPZdb|hJ2RsdmKeOx8y~kg$ zr;7b2*wg6x@323wUj?K2=TLr{4qgSLeF6Kc)&IA){as1A7qCCm&8td#v4%WNPp^V8 z{Ij&D>Fiaor>lU!0`N5by$Z(kqO?EZeHHBK685jso<`Dtf<4X2|1I24i_fcIFT(w_ zSiK7NBHT}l+pAzN!u_-uz6$mt+)s<=t6(p}{j}J=3icx0PmA-bU@yY`w3xpN_A=c6 zUnAh}aI^eVTu+;YSCvNdVlsK!UAzwWLMKn#k=Nl~2<2)2@;clLr95qTUWa=jm8Tuj z>u@i$^0cLT9qxr#p7vm`!@W?;)28ipxEFGH+U30t_d+jE+rn4jo-f`1nvI_Jk*~wO z(96@t^L4lvdU@KZz7F?7FHc+B*Wq61<>{pHpKwf18{+>q-98;nUWa?pFHdKg*Wq6D z%hTcKb+{M(@^o5y9qvWHJRPrIhkG%Op3Y;h!@U?sPY1Zy;a-fRrxV}na4*Ku(-HA? zxEJH-=}h@L+{=mg=@j}(;0wh(om*cCd?A^qlkO{lFEsOXHhv}Wg=n5m)2{@+P|ef% z`<1{KvUxhOzY_RDH&18$R{~!M=gAd#CGdrEo_vJY0bfqRPtL7-hrAN_LOINT{D!}Qe;EkO&-McyfceRQcwrYjeft~nXdYSpb9?jz_~%!? ze}4VrOZ@#&)KKTqt$3d10nY_K+ZF$^DE^TXYW*i+)b=r1^JU#%?(-jWN5wQ9o=gHs zGhJSD6I;_q_u{YQ6~I%{%OAVzDbe#UOYLtL>^a4c_c6`I#9aRorfFp?Y^G=aoLc(l zNmmE(h}8YloTofhF->Unao>Q(iwS~uKA!}Q9GEcl{=lO_W)Fj)tcam%CN zV~7X2;7_Awqb6`}nogAo1{L{i-`*2L-y_GAj2w}N^TWi|Sxu8{)oi5c520i#(8&h0 z?1)8`bL#l>C+xr3s(7)_mJcP!xF%#Hjj!7n%)+SGCIf=M78=TdHy?6RfSO*LxJ+vL zI|pvhfW?U~$@F%XsQN1vT;7s*lIwH0GsSlR-2q=z!$WrBH8(;CNLNbXN)Ns-|mEbSt&Z)S&+b~DYvZ2-%vLuW4)qsD)z5OQVk83^uL%{fixP5&b;3k#ID=P^dj@;)P zpXruR<_c{j7PQ>CC747xOu=F?T4z63Ga@&di(E(I1=O?BrFOx}VHqO;C9ccCgZBx| zdnPZ2&u|-SQsl_YsFFk*BI zenb=bv_`&&xpSW(*bpvhAEG929fo*Hw2vn7_NXBehK6eQ;~%j>D%xEo7ACGX_)uZ* zoqV)+e)32$#PZ%7lb`@?>-2%cQB)gtK_+_b{EBqm^f9=6_&$Zf2^3N4kx-+t|HulKxZdXz3q^uEb;7`s38|oY@R@{#&V^k9EpwcRH9qP|%Fs z9tl3{P*QQS(=x;tOvMfZugiWiNV!7n9$5cjIeNii%*VT{jUk4?>msRP~ z1A(nsewC=t5F8g7=@-F#xA<>6@Gq0`$sBvmy{7@tQ8P0?cGrLEPZ~NpmZz2ZU!9rg z9N!EZ9RoHfDH1;FCjc)J=^FiQ&A@izT|U5$co%At!XBmuyYlGw zaAjfM*_ayy7l#`!s>zkYG=ClM;P5SLyqS5jIXVl?J>3zt z1PA+(wjQHDX;;V#G}CX4#4p>&O1)(A4w+A%$TM8F!2`|aU#h%Ax{VXw|9y=MhT&NuzXKgz!Y zi)?FSF+J`a;O&tZKqtF1_kkC((iTw@9rh%$yVb1?)3lo$v}H4q%#K>zG!>UtK_Nn( zqA`ZRZTu9p&|8;GOa$?00$5P}uD$WdPD=@(t1vD?QlxG5fupbY4Kxdf>aqdYVje0H z;YS2GE0j2j8+}i2GsjE~>;yE_GX}0LaQu==(c0LWSd3pI%2+!wsR{?=F-$_-svCCP z6e8YAa_3B$A{J=~8Ko-J?D3}56HaGaST5gvqo!=~uB^(Mm@o>9M3$q&IeqEK9oVEA zY*@yYw?#bOMI32TqJP*i=9V-~j{ey-kz*gj%iwSq_3lj6mR$%_#3L#+4ao!G$ys#j zkD2MJKGzoacp)l1?_E^A$&4Lp)hOSp@~or`LETs;{DK;-7-Lx)FZ3?P zQ4v^P9*%_D?}zmy^y+Jex$l+e17 z^hINRRx-@yr5j+Z*=}w_3$t!_*5D>}GS(KMwZw*1?2sD{;tjuwlRc_I)?(~Sk60t9 zNrqEo+Pd-N!#%3Fep!R*1ZNcf@f_ayBl7$A_J)fd?h#Z@l)RVUP zMBEp>WlVW{%DJc9K5gD)CD{Giy#3IWsq3lL<81`_ke)M^x;mY1CEU2QmuH_9x(sxp zxzR6HoYfTHWc%Jcnky<9pvK7wT8h}k!kN67_KDrSa_^m8f`JeR#@=A_#(Dp?YVTH( zIhq;UDt^4+uMD;d0#?W|>OpX6`_I#Cb)-;Us5g=W9PE5Wf3 zS9Jy=F7YQvQ^jr%HkGgY21X1(G&g#YOrl#=4Q%z-fz)*@*Z@`Q)5)(?I{*Myw1fMa zA}#Kl^gWS$?#=HRB}i`(WSqVllg#rgoWz;8sr6pLi9+zGyJX!!ruW;M9DgzY)T%fq zo0lv~)>Dj#K>Rl6Cyl#pUvU%z^ObqCQB}HrioGlKhL&$PxuG_)YK3Wx+8drtA@l&_ z>46?fAf8c`+sY5%q44sHzhX+;DrvY&Zx6z%?X{CozuBe^>5+~k^&)o;VAQnX0=r1I z2O@IActdbs&>Az%Z7EdYc$2NUjDhP5S z03>Je(?Q-~FirGiO-u1c+l=Hb`y_KJ;LMB(8gvR19VN=~V|DH{c1Oy7Fd@eoIalto(_-&n1FG9}(7 z0TCXSCvw#_RbS~TU{~$s%3v60-VSH2JZC4Klx|Ev@B;eMZec(?y>Ij*UK5pAfvflT z?nOQx?G2P%P81E(4pXclnX1{kS(yt;^aGL^LryF+Ihr>Dr*U2V;lNjVt&0OgdzK@G zeR83yTuN8f4(iR$dAA%|ok7^dC0yM+!6JqhMJpnlYg|n|_e{TwAp96v%$} zW`tqThGTcVw`M!h8#G0(#3i70eXyTBJcpdbW&N`BkMA9)HK=bzK&)~L)dnF zHash2{HIOE`U!FFkTj18{!0eZwgZVh+lA(daVx(5igWaB+ zCWnHiJLtX2bk{Ny?i?T51AaS*G4@zxE;vI;e8-BGCC%L>gJvL^8n06gWQ9GNoX0$= zFB2AN0AfecM7_P<8Fly(fG8#e1H?2WbiPgxahdCF2)9cI6lobLX?%#3ZG8aP8F!-M z;+@Hl-dOc!8&Ud=>AiVH;I_?_ex{H5eX=eO4gj5m8HV2&xK@p%PFzr)KkaFi8#s_xyR3d7;IxF91C_W+l(xWFg;MQoS!cr* zCw^cE?vLv>%5))E{gi|yI?s`{p#i8lA5W`@5o-XN0yz_$u~pFgY%tHgy1aPHIH8=2 zb%zfh)L9g5n~_%~Sd))o4_Uu~8##oPL?a6x@DWuDYewR_rhr)&surCq@q0f(2GCAU zuR?6P?XR{Ng<=x%Qd)c#FIgeg{=i^*P}M$ba4KD*63Cv2I~BDRl6V^UF&Z2zRz!R8 zI$U^iiXiqCw&uIk4!|G+ldj1~TqFMub(aWo=B24ccV0S?wI#nUOTlZV2ByAtc_PO zjrfk5L>Qo^(9YzzxuBFQg`5@wpq2$6jNb^;#HKy_xPqjYWeVMT3ar`b`7*-ilHuPhaN7n+NR9`OM z4kA7nWQaU)sk#}3+1FH!=&s;aRp}0lmd?X1&*g8LViqe3y?2%J&Gl0&5ve;Q{_pQ8 z=p@C6@r)?})V~YnA=w>vbP4tLoRGD^>n#A`--$jinu3K(pRK@*OPFcD4{g1|ld0vr z5LY1WI547(Y$wjC>yJMg@s^AL*vpd5go&UZ^_gG(*fMCAVx?SCjx&P~DJ7jBI`r)O z8T+A{q{?p*7#$rWw>UrNdh4i_T|b|%T*E^Mv#BQrxmNl%ta_?XbOvg?QuJFRB}5%W zBnAPlv2M?y(#~3KmNs=d8Kegam~I!Ptz5K{wc%h~ZajzTC*NRbP||fL#t27Z@wdp1 zI96qs;UpNEBn3xilE7lp4(G`mTM*p&8N@AXrD501GAY>g!98jKPEITbQ##V|cWY4C z)!g<4jsbszcOI=~ z6TQSy4biyMZt+(5gx^@-Kk#r<`^JvQ(sXfVhhpX<&qn6RAfdDeFr&R-vY9#I7q*0P zt1`{1p}cN#Ghm?nP~PE~aD}vs<3WS!UMwP04l0A6w-E2%&1fV>F{(vpi@tLKopp+jlGHri(Dxm66$+G=1q(J~I z0YM^*_le`sMCJw=eDt&Wj>?2NjOywvcBzz}H zogW88kmCzD6k)Qpkb4!{vc#0+5nujX7A}g%3vx<<5Tq82u6tIC>(g4)o>Sq! zXWqX#iR*)E?4OQ-&_h6w(BqPawrBa04KtPNGuF+!yzAFEQb3aThJ zJjL!Bx3`4?OWUp3rQC(B-LXfG7G5)uNtyE#=uiLR8i77;c5V0cvDK{0CBe`8pG6+1 zFXU~sNTYuy0A$lwab%R@Hb*u69K9ayAg-*hFL9OwQ9zC`e*Z3|Xn0HdP>$naf6JRY zgRjG7ejv-=qJF?E@!4To$J7=htV1w(acx~Pvdos<189n0Qd);xcQGb|F1nl(s|5;% z%BX7(GxLV_che6Ooa|I2`O*5-^=MaT;h=zF@yUxatV0n)-|=42rZRzH&6$o821=9Y z@cne=xT@8FnMD1`gF0ESw2VEo^h+1|;_3qtaR(Fc;M>RrYD9vW6`vWRLkKZ2B zcUD!-9Q`!+=25%9XWdy7j79U;jMA#yWuT0)Qis&k{4isQO|D$N{;q64;wao-gPmYA z=SRd37rXF;UW`fZISUI4>ROwM;1KlLHXSxcTXp@iYT<|tQLBl)(kbF7k|z(B;DVfn zV2F7XMj=i=Q||Mvu$S& zg7F!M@lkkMF44j;{#(2ml{;-p5}RS91=*O?48p@R(#EL35Ag1t%Gcwdn!kN7NYZWt zZoCB5y}v2zX_t`@=a^q~1`rlV3~%HlB)aV(ah|Z8jwf*W?3NJY4F|5%nm?v(-^B4; zYs~}(^)*k`QyIbUb617VKxj$Nu|rPGy|X$zfVfGrFJ~ik4Z=t}>a#72u))9+s23Y4V)q8;Kp+&X5VEaC%fOZDrj=1_v2)>953M65I-U`eP=ZkrV!D*| zSySWQy_c;8Y8UH~>381PP5K3{!i$ZK z?TvkK;sjly3mGQk!;fv%R{Hp-pgVA;=;GD(7pizac_kLp(nu|WG?uvFP&7{TI!AkX zuk9CVe`eP0Fupha2!-+e*6%i%NNnyquE@rzr6*p9DTcfnFaBB8J}&eobcFk0pQ%8y z#!*^07XIn)1+b)um_6IyepK5#i7dSVvY7*nm6~0F1?1sQ%MTVKEO6XY+uo71>MY>V zWpn{@KN?9K>U~<_W)BVI8=4u4&C(3UR*9)BCxs7xs6~DEMYm0@cZsaNfE6RGYf=JF64Yu}qQfjLOBLCR zRG@P`y7(@yy-P5z;kPaH>cv0opi9L@b3Lj-Z;@oJf60+;MZ2HU&5Tual+OT{( zSeRPkK}ZAL4xw5kClIT;C(rLNZU>ZSr*Ajb;C*c6sV_zkdKJ;@j8l1l#cTpYNPm2C znK(Dkbkr)nI!+&_pTXvo5Job)VMnwh)~3;Q=k)?qocg2o>>D1q$TvxMYU#z$cPlCy zeXh@?b#jTWVNP_fhO>1u7q9<>wn7f|z2f9lSnDhLFqFZa)-R_ngMGzr7r7k#GY7YH zOQE8pQ36S_?2nMNbOK*Y+!fzH0AL*^;0)O3R@`v~-!&b)qY+rEnx!3^aK@nUbaZKW z07;Iv2y0iNaO@Z==V&xi+?}9_+pIYzgyV^#Q5He8ybw^{*hJk71+KSN{)OdDt+?^- z!grzJ+Ayn^1QSpqJsmJC1^I@H%)i!)UDx*qOBvAemkjKWLRo_?bm(d!2OL)VOjxvYUQi;z>>tN`@Q3B8!y{CiAFxq{4SvfG&7;P+@|!xYb%Bzz)8Sa?5- z`I;J&!#H7B;F~F>f8ITK?*NS0V-m0#0Jger3q*=iIFd2ouNcP7kLfSJFL^X>M#sx$ zU5{!?BU4(m^1ThwE{BeEK_-a%oCc| zAoFcIIIWy!E9ABtzm`~T1oA=RTHomh`ql(Np}WZ0N(C2(5ZHjzxW&@De($5qAnOj5 zM=-LKp+tFoEPrKS8Wb8f~NEGg9^n-G9k$gk(wlPk;`0dHw$WlIpdZ*9_q8?Lo z$sB@}WC5O%G4G=zygd141hF!1s?GdDKwfM;YfrJI=kmhICm~helx=oAM(Ewd5Sk?V z-qhNldgUt2_rmRimDO8lgh{8IL$!UT^;8bKg1!gbfVchvL)8lI0(8#!c#5{72A#oM z+nFj1!4BM6`g}Zu&cDunqO2S^*!=?Y2d}l3^U?j~6Gb!y8>J3F-l{yx^mjhhqkmD#~gj~C$)IjeBVxs$L@f3@pPXP*hxQW8x$fjl{7VFkW;N27_^ z9a2@<3>Hu@de@S`D6<_xP&XEovg^0lP(9Xa1{9#8hMVM-18 z3Hpc?q_{~chG?lzq>n}Fy%~;O}5K06Roh-)NJ@=<8w^ zK@0ws3R}uf68f-(@tJQe?pA%SjMm=QobZPxUv;Ra9QdH0I(*S)Tln1ep7#ObX zS+2V2bgEAPN9y4zQ!aXwftp-t4}xMKg#L*qJ-P`fDLN6`1%66dTlysH$M^elB~*Pj zLp|n)W8zE}E;QnqnqLIx0$-(iSV7)mQDA_Rl$~;e&J#82u`@}zN3%_xO*6q7p8#23 z&P7GN=RVecc$+E3<*j-JxAU-Crq;x2Qc*1W{xEJq0_#_;G5LZM)@@kcV&f5;6)ZHY z_Mk@j0H~2@I{vPXA5rmOwrE+?fLrc&@im8BEv1AYjqC{jfdI!3&iCv*IlKsqir7gP z@;6|(mOoC<(878YxfC~*98@0g{Lq_u?)et3YtVH1{LMMn#z|=ghD^x@VK#En?B^FU zFSo3nwIVqz&B;)(Wu}NhGXzmizv_3!4wRmrS1=;9U7qnJ%VzKKleSsX-^39gVs4VW)ZMFolR zyun>PF5U+<9^mSrkZ@8j>0&_sTwC6=KD9IjmFfVVwT5dAg@!u$!^_E%9(At%o?{tY z38i}p4+-eT9PQ9_;j)@SfMIBkO)Wg4@Y6n06l(f$w%Kt%1Rc^M2T)~;zIGcWb0^U< zu$A$Q%gpvCV~=tpV%K4?E> zXq#aS3V~OuinJ#C2}2qn4#}s1i@Q=FG7xcPDc(=WyWuS$ld&dF72zgxTLSiC`R+>( z$>-Hht;p{EJ83_Wo~|LD&8paF&RgrJlQa#O!6=?mwE(wvyz8OwfXRF70F^ByH1_$B z-7l}iX;x>+Csm;%diuvTV+ln-aW`QI#Au#1zSw2?K zkY1NdJj3UAvJoQ?AADEvWPg0)7EQVTKCj>+;0=~|Z*?BRYZJg$VCdj!K>|Y7z~IIX z(B&+lu+Og3-J2mqcQt@apR5K=R}zxFmYR643gbJ16C`z0Mjc1a&~VT5O9_Gk7B66; zD7w?lNd0|-PsY#p)Ak4|5mC7O^j}oYB$>$BM5q)otqjqB8*RDx@-<=7nSH5qLBs`v z9Rf)P4kI8?xpOWQwgw(+G?5xe6!TfukixV4S{|MO{opQUXu^rt8Y5kc>5AGJ|%;-QTTZtp&fvNCOY}S4b98uId z`5)t>@_uzfAIT_$SehV8pZbEw_1$45S)>^C0C&?5EDldBwz}kVxW+vhMT;bGc*Q_s zw2iRDr1L2ugsBO?9AlS0X`so2cew=hPCgD*m$XFZCoFRitbhO&9&=Jna%ZdE#-Zuei9ZzW5`e5euw;qpBl%MKR!2o!8f_V< zb5Gh>(YBHVq7CBwp-f|k(Nd^&Sy>!Lxgt|!BdmjRqD1x(CxtgkQuk|7ju@fvn>?L| zx*_3%%iME}O$C()u$aj)ENS!fjn4idtkb|Ld4u$u2D!(bu;9-R*d)FAV&bg0*9oNq zAg#gN6qK3+x3R3hPk%s47Z|sEY~61qQVMQ2>~XtE%)YfdW<`s4Rvd`LK#13*mRI3K z=ORi}H%tZE4Wq`KaPAFnYp0>mq-Y|xS$JDMb`2Q^J+|IyT1`laCjoz@(q6DiEB*aV zO9$v#9Ak5Y^|u4C8vXpNyfmb*KEtYLgXp z=sqIC-C|%Qu;rNmH-6z}TTC?lWYvU8>s;0IDNu_|p%>MyN42sVJ9ffr6SkhstlulQ zjCR3P{v3}ph}KO2ODLiNb^GN0>O@_K-^ikkubM;R%@2@F`@<8pq#Rs%TAau-Z#S{` zIK!hiWpV7c3O`7Iyd+s!_!E!n){{cS-m0n+aP_OG1jcp8h8))< ziQlfQi)^Nn3hwM}@V~JI>olUgoRw(rr1)s8RK%QnDA27-c!ID!w}q9|&?0csiOvIm zpvKTRPq_lRM#5a^GsPTL!riK~Qf+kGDy`Y#%#%wL8_99t6FZ1TSv}*vh}uGoqx(}0 z{C+_ICswf&BRrc}YoyU_#dQSuOvk7*2F!I?qLQUsEqYqi>FbH=6Fj*)F-q$+znm{BWbu6^-IDi_uzyoQM)dZjOkVnH8^H1Pb5w$a&J?oKpqb~ zbG3`pMk=m!H~$wHI;*@qI~L=gRE~OG!ED1^b3S(us`}70m&c(b;gF0`SVy8Ad|@&9 z*-T3rWcS>{lOfKiTrn*l<03AjD|wr%Blf)P^D!E(S4@`;K$9{;z8zml?s0QU9%=95 z`Jtt=aG+>1Gic(dC{HL47G^TOuSft)C?!+hBFMGgTz_^9ymY26wc-;^Rn=;DFRNR= zB{L?TrNP%DQXn9RJU4_e)#a{*UZemO)PbTo4ma5GKN(C`2?fVZ<%1swTp9IM`8hjU zMf4VL2^$-KZn1YX4q3i!s}~XCFL-~gl=DO4z{AfKQiI^IO>Ulv6OS3!!(LB!9TJI@ zx=+#0vFH#{5ox&01@he3(9=PFdV*6?(hnz(RCv`IU6_l6{fkp{mV~<-JI)1$XdjfJ z^~QC_hf=l|qY4JjgL1W&&m{FjDG(V}hwCNZTJ{e$mAD^2 znUd|_E}H2qQ8kv^mWtJaRN(-khqJcUsy*C?;C%Cop^$X=L3^0Zx%-&xS*)hVM&GW< zj;-y=udlJK4yZYUIs6JlL}EAyL1)5CNKj)W9~&nMyPc0tH8rHT6)#hpi3*RxNAr;$ zD1ly>G#o~)i=HXM4P4QCOl^@Sc`u7>LDP2^I9Vww!|iLcS|v*EqsBS^zAbj7lHXg7 zg?Mz%PCC6-d<#dw0ZoZ5RI?5}q6q{Ary&V7v0QB7g6(^WbkqhxMX;th1qalWH(rDy z0ukaGok0>+6D)xA2k;1uqi?u&JZ`D1{D=1~^4p7`#R}=vfmJ=n%YtXkj5h)AjaF!l zi7D0QQ+vBRgn1wD@QG@MvW}OjMxss?$=4FpuJJWp&61>9Kjhzgmd%s_c$i2oP#AMh zt_SI&p}IYd-bN;x{}$a&-i`-7e{f>ce8z>fz%j390br8G$$)AAhvzD!2 zm=C?#roE(4)`y#h%`V=G$rNWLhmAV7SXTMi%=`s?_BVI>xKZhRqDa%%5co>17h-Wz~}MPStIXM^9tXIb@q}u zVZY}-lhCdr#7}xhwHSuAKx-TcMf8!hgzuNm@7`dul{)6S>v~_)pCOmuIo=Tc?w@4J+I_{3<073xVOYnP zTuR5T&46s7wlm}65U1|3#mEqC?Ouw8n~&eM&ZK^Omis!T!u{fx;O$!1VVYtHD^rc5 zLA2X68)NFGKDCWU!mV5lnqr#xLx$%D@!CP&MxjWvY_;&f+nCjzl1ImGttee}-OSM6 z;T;v;tw?j(xk&ey-Ju`|dovx+aBrD!apH`_NlV8*C;2n$Or7a!@!R$9R+05lZuHGm z@uRXzMS3TXb*IQ9F}Mzu(Px^W@!qstG13#944I5(S#l0Vaxu8%nzG-Q6~@vF3rn|j03k=>F0yjoseO_;B>X0}jJp>>rl zE$qx+nXE?>TbZKp2`aJ=uAE18F)K8e5 zx}}vpsxuieTr^C9)eS!J0Lmgq`Q42}x?(!XHSFG}#LJ1ZiXnX-{STa#H{k0pnHP@A zlwhNrrxiz@FbggA1`9b0sc2&AXAO1p`w3TX`0$udhG8gr^W`^}gU0$aj!j&cW)Qn@ zJ}P!FZLyu1Z^Jdaf|<2Uwks-cA@Rt{oCnrt`swgdiBUbb0^`y=ar=83_pkVyPu;!c z?plwZk!2MLr;s(}rr##hs_pu+ZLAym+TkcK@@@0>bo6J+CY8Wuf6>Uuyq^WXt})Op z>;|(n*SEE_SlNs!9I1LTVO1(5Q<+7{krXt|SYERnj`EA0y}07^SGk+?i;0s|lmvv* zPx)HDt1_i4ar4{BQUeq%Lot#a#yO43kJ`!4u$xt}1ikD1Yxb4N>ByeZ<|5Yq=#=sA zAJx;?BD%=(EeBAQv|RhbIRF&Zv7heEkqyV|JCRb#i*qV&eHt_P-J1JtTm+eu%hqAj}aZoy~)*_<$jAh1`AEkE^()N`AA$t#Tgv^m=Pkn z_iOH|*G2bdekqoPwHsq9_hxur+#_tSuyKzh(61JFeIk@oxbA(D52?b!AI^kKR{X4Y zvwXKC?zemGI6T|8=TL!4RJ}bsm51@dn$`X4%G=D&&a`6z4_HxpGikv(GUJ7!8(H}- z>|3`T8^eYD?vG#ozJM@278pfdcws{7mz79e2DemUTnAOxSO#5uK317X*gJcBNDy1K_Di0q=Yn-d=d#AKzIyA6Km z5-Bu&;uYqlN${uft#|*Pk6JroyzzQ+K$_*m^@l;rG7ocxH*FL`|GM)~7I7$vQ#`m! zwp7O0L}=c*&XQ3rCC9+&?(B^INhR!z?^C(Y+{HKc-oLY>pYS}ZyLirj=Vrjjl0B5= zxNh%~$#iaXe7fYUe7CWp)2Pwat-Z?M7W=Lj<7 z>SR1>V!=y&W3z%6a0}(1UW|%e9##fBeLb34c5Nta?VozoGOYE=Ky8 z>HOqdic+pmowmCF`vhIgTzPtmENpncYF&L48q*9H3>^gW=&Hot!@db zOxea~(L{$}BL-%dc97`;Id3{s#p&p%(&57cwR9eAcI3!u{+mwx5p>4Wrj(z5PF)molTKWi{Hp@+}`#Gm9fa8Srk8nltr+dH7Wr zC1bGuPI?S?}8&CG|z zdO7~W>r!vn^|L3-ybJT{=O1@Zx@M|si^UEP#lHulE^l}F>^~aZaHFM1 zu#~zd_iMa!3y#0_f;x_xZ-{HkwW~1*X86RUt#G!|rW_0(M zQjmnHj3rCCFS|o8mGT1`ALpPP<9nPfraFwy*YW~s7jkcAows=1mFNt1tKBxb++~Fr z=n+eS$&sZNHk~O8sjokld1|d{jclfbTRUYtM>x!BZAM~UR52l}MaK?L>)tSpi=8KH zGu2EvyE)S{ak-J*mU3Z@LjJ^;%H<@MZ_J9Qw)q&rPLaF8QU$pRiH=b_DsK%2=v0&1 z6oM=ncyDBWyv8@QYPkj@O0nX#L|bOzOMPS-5n5X{{S5N?Tj@>gDMRl^ zdxcFB>e+6W5JqvITgt_cO%C#r*ZV@;gmj;dIQj=bL!S(8%>|O1XxIhEZud2vo=ac- z`t?h4{Q%y(?zI$V!>rJ9+(}d6`}?ZNw;t#-n#Hy=nlYx`@7AssjAuAyG{?3Z@bccA z^i`()d6@l!D8+TiIaKbG#rW@OYh>jdvNtjQwX4cOO`4l;Y)6arE9uVZK?3J1Biy6K zs1$XJ`pr(S*GkRys7~e@%GFh?1>Auz6lU}vb9!h}XT!OCCK2N3pOY^Bg}MFR;c#Eh z?l8Hdve74<*^(X@y@vDDU5>)lY~VWkf1G{8m2g8cd>mhvC%aodQVfHE&Y@Q`{kz>f+7v{cZ-j6iVx%)Cy%9*QjTEM{X)}@Wv zrh7|{$qtipNK}7(>c?b}jyKUAK4FV`OS7AEF5{VH%J^-Gg~emgb$*KXc(0{^os0nM zwk__y89`Uud``Y0flJqQd?u$jAKAA?-A#Db+?r=-R~lf4UfS$==W&;tnf9vXt*mdZ z+8=J;V_O-Nb$|SxITTV=+xGQ1TN#poK~j(1vjF3fg8BLeGs@XrSkNTphYe&{5F$!7F~ z?F5thpOTgS$!0YaRgsgE#XAEDr`Y|3*#9Dqum@m20E0`}+}hI01%gHZB_Xt3%PApz_hK#{|t;6Pmzu%e10 z-p<pw+lX*g0o$<7^O#rQIzNExGIurwE2)5n z{z+*Cjet7H4Ras>Ms4wy&Jb{#;5Qn0JCF{hbxB)GMNkfJYi&ZXrWREJa&N6oW$Z0& z_sg!x{I&{A>USp~ao60%)Cz=z5$y1`zeWAu3xI?kfUO*$qA6MvoLxXb*#7YU((ZMD zWL_}oms%8DYj9SiGdg$`kp1Pq^1qizEvjG-6n3$;w}go5TieUnJ6r!F0L>h9?XAJG zFOdBNS)ijD{_mNOAI+!VbAop1l z!Y3Vysag@vcJ5mZrWI$4_9VyQhr2&8sbeSXAL)Lxybbu07DO0mM@b+eSf*C-ogYX&g!xLjae;An`WR(_kURUv7>Lq=VhqZV;I(Lk$?YUo7os=V0M#135DN^yVWw93)~BW zgV}9Upo9_W1n3%Gp8c$Hepx49`)RuoKeIG>TY>St#PLFG=IK)@Fa6-)3>@qLejpG; zNG`q`A22%8&-2R2UU8k|m(lKxd zC`n%&0^~Ivw1-2296n+V80q=I z0*UV=?cg}laS7HYeBtqCj>OQC}3uWF^wj;60L9`@@_uATp4s9R^1_ z1{P$v5w*iXNyosEt}_k_@}Lg(#Q{NwL<17tNi`DRfMO6J$Lydz;4(&!V zC5g{4D4JxgfOt=GJ^+oxcVSR0NHZiF15M%wFc_S~hhT6h$(+Gv#P)!D6o?r*Fa{ir z2Dw~B8Wuzo|D(AO@W3w$!f%A?J{f2KQ3rUa1s-?_9snH}^cs*Sts)OG6h~pqktmpj z1qz0Pn&QnY;CPHF9*su8;Ao^791E8eN8wDMXcP`^j>O_D@MdT{$^?Tp0h|gA!y=)k s2ua|l2nfA~V1eD)=TuvJnuB-geZ&QT``y29!!Zabf|`d%K~s_XKZb*=TL1t6 literal 0 HcmV?d00001 diff --git a/samples.pdf/readme.md b/samples.pdf/readme.md index 70bb392..76c6499 100644 --- a/samples.pdf/readme.md +++ b/samples.pdf/readme.md @@ -4,5 +4,6 @@ * [BaseX100.pdf](https://files.basex.org/releases/10.0/BaseX100.pdf) * [icelandic-dictionary.pdf](http://css4.pub/2015/icelandic/dictionary.pdf) * [page-numbers.pdf](https://www.w3.org/WAI/WCAG22/working-examples/pdf-page-numbers/page-numbers). +* [page-numbers-password.pdf](https://www.w3.org/WAI/WCAG22/working-examples/pdf-page-numbers/page-numbers). * [Sentience-in-Cephalopod-Molluscs-and-Decapod-Crustaceans](https://www.lse.ac.uk/News/News-Assets/PDFs/2021/Sentience-in-Cephalopod-Molluscs-and-Decapod-Crustaceans-Final-Report-November-2021.pdf) * [Legal RAG Hallucinations](https://law.stanford.edu/wp-content/uploads/2024/05/Legal_RAG_Hallucinations.pdf) diff --git a/src/Pdfbox3.xqm b/src/Pdfbox3.xqm index 057c852..1d5b03d 100644 --- a/src/Pdfbox3.xqm +++ b/src/Pdfbox3.xqm @@ -21,7 +21,7 @@ declare namespace PDDocumentOutline ="java:org.apache.pdfbox.pdmodel.interactive declare namespace PDDocumentInformation ="java:org.apache.pdfbox.pdmodel.PDDocumentInformation"; declare namespace PDOutlineItem="java:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem"; declare namespace PDFRenderer="java:org.apache.pdfbox.rendering.PDFRenderer"; -declare namespace RandomAccessReadBufferedFile = "java:org.apache.pdfbox.io.RandomAccessReadBufferedFile"; +declare namespace RandomAccessReadBuffer="java:org.apache.pdfbox.io.RandomAccessReadBuffer"; declare namespace File ="java:java.io.File"; @@ -33,7 +33,7 @@ e.g pdfbox:with-pdf("path...",pdfbox:page-text(?,5)) declare function pdfbox:with-pdf($src as xs:string, $fn as function(item())as item()*) as item()*{ - let $pdf:=pdfbox:open-file($src) + let $pdf:=pdfbox:open($src) return try{ $fn($pdf),pdfbox:close($pdf) } catch *{ @@ -42,13 +42,22 @@ as item()*{ }; -(:~ open pdf, returns pdf object :) -declare function pdfbox:open-file($pdfpath as xs:string) + +(:~ open pdf using fetch:binary, returns pdf object :) +declare function pdfbox:open($pdfpath as xs:string) +as item(){ +pdfbox:open($pdfpath, map{}) +}; + +(:~ open pdf using with password option, returns pdf object :) +declare function pdfbox:open($pdfpath as xs:string, $opts as map(*)) as item(){ try{ - Loader:loadPDF( RandomAccessReadBufferedFile:new($pdfpath)) + if($opts?password) + then Loader:loadPDF( RandomAccessReadBuffer:new(fetch:binary($pdfpath)),$opts?password) + else Loader:loadPDF( RandomAccessReadBuffer:new(fetch:binary($pdfpath))) } catch *{ - error(xs:QName("pdfbox:open-file"),"Failed to open: " || $pdfpath) + error(xs:QName("pdfbox:open"),"Failed to open: " || $pdfpath || " " || $err:description) } }; @@ -66,6 +75,15 @@ as xs:string{ PDDocument:save($pdf, File:new($savepath)),$savepath }; +(:~ $pdf as xs:base64Binary :) +declare function pdfbox:binary($pdf as item()) +as xs:base64Binary{ + let $bytes:=Q{java:java.io.ByteArrayOutputStream}new() + let $_:=PDDocument:save($pdf, $bytes) + return Q{java:java.io.ByteArrayOutputStream}toByteArray($bytes) + =>convert:integers-to-base64() +}; + (: release references to $pdf:) declare function pdfbox:close($pdf as item()) as empty-sequence(){ @@ -150,7 +168,8 @@ as item()*{ else error(xs:QName('pdfbox:property'),concat("Property '",$property,"' not defined.")) }; -(:~ summary CSV style info for all properties for $pdfpaths :) +(:~ summary CSV style info for all properties for $pdfpaths +:) declare function pdfbox:report($pdfpaths as xs:string*) as map(*){ pdfbox:report($pdfpaths,map:keys($pdfbox:property-map)) @@ -162,7 +181,7 @@ as map(*){ map{"names": array{"path",$properties}, "records": for $path in $pdfpaths - let $pdf:=pdfbox:open-file($path) + let $pdf:=pdfbox:open($path) return fold-left($properties, array{$path}, function($result as array(*),$prop as xs:string){ @@ -203,12 +222,12 @@ as map(*)*{ (:~ return bookmark info for children of $outlineItem as seq of maps :) declare function pdfbox:outline($pdf as item(),$outlineItem as item()?) as map(*)*{ - let $find as map(*):=pdfbox:_outline($pdf ,$outlineItem) + let $find as map(*):=pdfbox:outline_($pdf ,$outlineItem) return map:get($find,"list") }; -(: BaseX bug 10.7? error if inlined in outline :) -declare %private function pdfbox:_outline($pdf as item(),$outlineItem as item()?) +(:~ BaseX bug 10.7? error if inlined in outline :) +declare %private function pdfbox:outline_($pdf as item(),$outlineItem as item()?) as map(*){ pdfbox:do-until( @@ -274,16 +293,13 @@ as item()? =>PDPageTree:indexOf($page) }; - - -(:~ save new PDF doc from 1 based page range -@return save path :) +(:~ new PDF doc from 1 based page range as xs:base64Binary :) declare function pdfbox:extract($pdf as item(), - $start as xs:integer,$end as xs:integer,$target as xs:string) -as xs:string + $start as xs:integer,$end as xs:integer) +as xs:base64Binary { let $a:=PageExtractor:new($pdf, $start, $end) =>PageExtractor:extract() - return (pdfbox:save($a,$target),pdfbox:close($a)) + return (pdfbox:binary($a),pdfbox:close($a)) }; diff --git a/tests/test.xqm b/tests/test.xqm index 155e7d3..5c041bb 100644 --- a/tests/test.xqm +++ b/tests/test.xqm @@ -6,7 +6,6 @@ import module namespace pdfbox="org.expkg_zone58.Pdfbox3"; declare variable $test:base:=file:base-dir()=>file:parent(); - declare %unit:test function test:pdfbox-version(){ let $v:= pdfbox:version()=>trace("VER: ") @@ -61,10 +60,10 @@ function test:labels(){ }; declare %unit:test -function test:extract-save(){ +function test:extract(){ let $pdf:=test:open("samples.pdf/BaseX100.pdf") let $dest:=file:create-temp-file("test",".pdf")=>trace("DEST: ") - let $outline:=pdfbox:extract($pdf,2,12,$dest) + let $bin:=pdfbox:extract($pdf,2,12) return unit:assert(true()) }; @@ -82,6 +81,7 @@ function test:page-image(){ return unit:assert(true()) }; + declare %unit:test function test:with-pdf(){ let $path:=test:resolve("samples.pdf/BaseX100.pdf") @@ -89,11 +89,39 @@ function test:with-pdf(){ return unit:assert(starts-with($txt,"Options")) }; -declare function test:open($file as xs:string) -as item(){ - test:resolve($file)=>pdfbox:open-file() +(:~ get PDF from url :) +declare %unit:test +function test:with-url(){ + let $url:="https://files.basex.org/publications/Gath%20et%20al.%20%5b2009%5d,%20INEX%20Efficiency%20Track%20meets%20XQuery%20Full%20Text%20in%20BaseX.pdf" + + let $count:=pdfbox:with-pdf($url,pdfbox:page-count#1) + return unit:assert-equals($count,6) }; +(:~ password missing :) +declare %unit:test("expected", "pdfbox:open") +function test:password-bad(){ + let $pdf:=test:open("samples.pdf/page-numbers-password.pdf") + return unit:assert(true()) +}; + +(:~password good :) +declare %unit:test +function test:password-good(){ + let $pdf:=test:open("samples.pdf/page-numbers-password.pdf",map{"password":"password"}) + return unit:assert(true()) +}; + +(:---------------------------------------:) +declare function test:open($file as xs:string,$opts as map(*)) +as item(){ + test:resolve($file)=>pdfbox:open($opts) +}; + +declare function test:open($file as xs:string) +as item(){ + test:open($file,map{}) +}; declare function test:resolve($file as xs:string) as item(){ file:resolve-path($file,$test:base)