[snap]
This commit is contained in:
parent
2b131ee71a
commit
bd5b7cf006
14 changed files with 249 additions and 39 deletions
|
|
@ -3,8 +3,8 @@ xquery version '3.1';
|
|||
pdfbox 3.0 https://pdfbox.apache.org/ BaseX 10.7+ interface library,
|
||||
requires pdfbox jar on classpath
|
||||
3.02+ required tested with pdfbox-app-3.0.2.jar
|
||||
@see https://repository.apache.org/content/groups/snapshots/org/apache/pdfbox/pdfbox-app/3.0.2-SNAPSHOT/
|
||||
@javadoc https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.0/
|
||||
@see download https://pdfbox.apache.org/download.cgi
|
||||
@javadoc https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.2/
|
||||
|
||||
:)
|
||||
module namespace pdfbox="urn:expkg-zone58:pdfbox3";
|
||||
|
|
@ -31,11 +31,11 @@ declare namespace PageExtractor ="java:org.apache.pdfbox.multipdf.PageExtractor"
|
|||
declare namespace PDPageTree ="java:org.apache.pdfbox.pdmodel.PDPageTree";
|
||||
|
||||
(:~
|
||||
@see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.0/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/PDDocumentOutline.html
|
||||
@see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.2/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/PDDocumentOutline.html
|
||||
:)
|
||||
declare namespace PDDocumentOutline ="java:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline";
|
||||
|
||||
|
||||
declare namespace PDDocumentInformation ="java:org.apache.pdfbox.pdmodel.PDDocumentInformation";
|
||||
(:~
|
||||
@see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.0/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/PDOutlineItem.html
|
||||
:)
|
||||
|
|
@ -80,26 +80,53 @@ as xs:integer{
|
|||
PDDocument:getNumberOfPages($doc)
|
||||
};
|
||||
|
||||
(:~ map with document metadata :)
|
||||
declare function pdfbox:information($doc as item())
|
||||
as map(*){
|
||||
let $info:=PDDocument:getDocumentInformation($doc)
|
||||
return map{
|
||||
"title": PDDocumentInformation:getTitle($info),
|
||||
"creator": PDDocumentInformation:getCreator($info),
|
||||
"producer": PDDocumentInformation:getProducer($info),
|
||||
"subject": PDDocumentInformation:getSubject($info),
|
||||
"keywords": PDDocumentInformation:getKeywords($info),
|
||||
"creationdate": pdfbox:gregToISO(PDDocumentInformation:getCreationDate($info)),
|
||||
"author": PDDocumentInformation:getAuthor($info)
|
||||
}
|
||||
};
|
||||
|
||||
(:~ convert date :)
|
||||
declare
|
||||
function pdfbox:gregToISO($item as item())
|
||||
as xs:string{
|
||||
Q{java:java.util.GregorianCalendar}toZonedDateTime($item)=>string()
|
||||
};
|
||||
|
||||
(:~ outline for $doc as map()* :)
|
||||
declare function pdfbox:outline($doc as item())
|
||||
as map(*)*{
|
||||
(# db:wrapjava some #) {
|
||||
let $bookmark:=
|
||||
let $outline:=
|
||||
PDDocument:getDocumentCatalog($doc)
|
||||
=>PDDocumentCatalog:getDocumentOutline()
|
||||
=>PDOutlineItem:getFirstChild()
|
||||
|
||||
let $bk:=pdfbox:outline($doc,$bookmark)
|
||||
return $bk
|
||||
|
||||
return if(exists($outline))
|
||||
then pdfbox:outline($doc,PDOutlineItem:getFirstChild($outline))
|
||||
}
|
||||
};
|
||||
|
||||
(: return bookmark info for children of $outlineItem as seq of maps :)
|
||||
(:~ return bookmark info for children of $outlineItem as seq of maps :)
|
||||
declare function pdfbox:outline($doc as item(),$outlineItem as item()?)
|
||||
as map(*)*
|
||||
{
|
||||
let $find:=hof:until(
|
||||
|
||||
as map(*)*{
|
||||
let $find as map(*):=pdfbox:_outline($doc ,$outlineItem)
|
||||
return map:get($find,"list")
|
||||
};
|
||||
|
||||
(: BaseX bug 10.7? error if inlined in outline :)
|
||||
declare function pdfbox:_outline($doc as item(),$outlineItem as item()?)
|
||||
as map(*){
|
||||
hof:until(
|
||||
function($output) { empty($output?this) },
|
||||
function($input ) {
|
||||
let $bk:= pdfbox:bookmark($input?this,$doc)
|
||||
|
|
@ -112,10 +139,9 @@ as map(*)*
|
|||
"this": PDOutlineItem:getNextSibling($input?this)}
|
||||
},
|
||||
map{"list":(),"this":$outlineItem}
|
||||
)
|
||||
return $find?list
|
||||
)
|
||||
};
|
||||
|
||||
(:~ outline as xml :)
|
||||
declare function pdfbox:outline-xml($outline as map(*)*)
|
||||
as element(outline){
|
||||
element outline {
|
||||
|
|
@ -175,19 +201,26 @@ as xs:string
|
|||
};
|
||||
|
||||
|
||||
(:~ pageLabel for every page
|
||||
(:~ pageLabel info
|
||||
@see https://www.w3.org/TR/WCAG20-TECHS/PDF17.html#PDF17-examples
|
||||
@see https://codereview.stackexchange.com/questions/286078/java-code-showing-page-labels-from-pdf-files
|
||||
:)
|
||||
declare function pdfbox:getPageLabels($doc as item())
|
||||
as item()
|
||||
{
|
||||
PDDocument:getDocumentCatalog($doc)
|
||||
=>PDDocumentCatalog:getPageLabels()
|
||||
};
|
||||
|
||||
(:~ pageLabel for every page:)
|
||||
declare function pdfbox:pageLabels($doc as item())
|
||||
as xs:string*
|
||||
{
|
||||
PDDocument:getDocumentCatalog($doc)
|
||||
=>PDDocumentCatalog:getPageLabels()
|
||||
=>PDPageLabels:getLabelsByPageIndices()
|
||||
};
|
||||
|
||||
(: text on $pageNo :)
|
||||
(:~ return text on $pageNo :)
|
||||
declare function pdfbox:getText($doc as item(), $pageNo as xs:integer)
|
||||
as xs:string{
|
||||
let $tStripper := (# db:wrapjava instance #) {
|
||||
|
|
@ -198,4 +231,19 @@ as xs:string{
|
|||
return (# db:checkstrings #) {PDFTextStripper:getText($tStripper,$doc)}
|
||||
};
|
||||
|
||||
declare function pdfbox:report($pdfpath as xs:string)
|
||||
as map(*){
|
||||
let $doc:=pdfbox:open($pdfpath)
|
||||
return (map{
|
||||
"file": $pdfpath,
|
||||
"pages": pdfbox:page-count($doc),
|
||||
"outline": pdfbox:outline($doc)=>count()
|
||||
},pdfbox:information($doc)
|
||||
)=>map:merge()
|
||||
};
|
||||
|
||||
(: @TODO :)
|
||||
declare function pdfbox:pageAsImage($doc as item(), $pageNo as xs:integer)
|
||||
as item(){
|
||||
(: BufferedImage image = pdfRenderer.renderImageWithDPI(i, 200, ImageType.RGB) :)
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
(: test use of pageIndex :)
|
||||
import module namespace pdfbox="urn:expkg-zone58:pdfbox:3" at "../src/lib/pdfbox3.xqm";
|
||||
import module namespace pagenos = 'urn:pageno' at "../src/lib/pageno.xqm";
|
||||
import module namespace pdfbox="urn:expkg-zone58:pdfbox3" at "../lib/pdfbox3.xqm";
|
||||
|
||||
declare variable $base:=file:base-dir();
|
||||
declare function local:go($doc,$pdf as element(pdf)){
|
||||
let $range:=$pdf/@pages/tokenize(.,"–")
|
||||
|
|
@ -11,7 +11,7 @@ declare function local:go($doc,$pdf as element(pdf)){
|
|||
};
|
||||
let $src:="257107---Book_File-Web_PDF_9798400691218_486731.pdf"=>file:resolve-path($base)
|
||||
let $doc:=pdfbox:open($src)
|
||||
let $labels:= pdfbox:getPageLabels($doc)
|
||||
let $labels:= pdfbox:pageLabels($doc)
|
||||
let $pdfs:=doc("pdfs\chunks-docbook.xml")/chunks/pdf
|
||||
for $pdf in $pdfs
|
||||
let $range:=$pdf/@pages/tokenize(.,"–")
|
||||
|
|
|
|||
|
|
@ -8,15 +8,17 @@ declare variable $samples:= map{
|
|||
"climate": "data\drop-01d\set\2-6-1\A5579C_1\271989---Book_File-Web_PDF_9798400627484_486728.pdf",
|
||||
"women": "data\drop-01d\set\2-6-1\A6229C_1\257334---Book_File-Web_PDF_9798216172628_486742.pdf",
|
||||
"genocide": "data\drop1-pdf\GR2967-TRD\272791---Book_File-Web_PDF_9798400640216_486366.pdf",
|
||||
"world": "data\drop-01c\gpg-book\2-6\A3506C-TRD\256186---Book_File-Web_PDF_9798216038955_486148.pdf",
|
||||
"dummy": "lib\abc-clio-dummy.pdf"
|
||||
"world": "data\drop-01c\gpg-book\2-6\A3506C-TRD\256186---Book_File-Web_PDF_9798216038955_486148.pdf"
|
||||
};
|
||||
declare variable $base:= "C:\Users\mrwhe\git\bloomsbury\content-architecture\xquery\ABC-CLIO\data";
|
||||
(:~ resolve :)
|
||||
declare variable $PDF:= $samples?women=>file:resolve-path($base);
|
||||
declare variable $PDF:=
|
||||
$samples?world=>file:resolve-path($base)
|
||||
(: "C:\Users\mrwhe\git\expkg-zone58\pdfbox\samples.pdf\icelandic-dictionary.pdf" :)
|
||||
;
|
||||
|
||||
|
||||
|
||||
let $doc:=pdfbox:open($PDF)
|
||||
return pdfbox:outline($doc)=>pdfbox:outline-xml()
|
||||
(: return pdfbox:extract($doc,"c:\tmp\junk3.pdf",1,pdfbox:page-count($doc)) :)
|
||||
|
||||
return pdfbox:information($doc)
|
||||
69
src/test/test.xqm
Normal file
69
src/test/test.xqm
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
(:~ tests for pdfbox3
|
||||
|
||||
:)
|
||||
module namespace test="urn:expkg-zone58:pdfbox3:tests";
|
||||
import module namespace pdfbox="urn:expkg-zone58:pdfbox3" at "../lib/pdfbox3.xqm";
|
||||
|
||||
declare variable $test:base:=file:base-dir()=>file:parent()=>file:parent();
|
||||
|
||||
declare %unit:test
|
||||
function test:pdfbox-version(){
|
||||
unit:assert(starts-with(pdfbox:version(),"3.0"))
|
||||
};
|
||||
|
||||
declare %unit:test
|
||||
function test:page-count(){
|
||||
let $PDF:="samples.pdf/BaseX100.pdf"=>test:resolve()
|
||||
let $pages:=pdfbox:open($PDF)=>pdfbox:page-count()
|
||||
return unit:assert-equals($pages,521)
|
||||
};
|
||||
|
||||
declare %unit:test
|
||||
function test:outline-none(){
|
||||
let $PDF:="samples.pdf/BaseX100.pdf"=>test:resolve()
|
||||
let $outline:=pdfbox:open($PDF)=>pdfbox:outline()
|
||||
return unit:assert(empty($outline))
|
||||
};
|
||||
|
||||
declare %unit:test
|
||||
function test:outline-present(){
|
||||
let $PDF:="samples.pdf/icelandic-dictionary.pdf"=>test:resolve()
|
||||
let $outline:=pdfbox:open($PDF)=>pdfbox:outline()
|
||||
return unit:assert(exists($outline))
|
||||
};
|
||||
|
||||
declare %unit:test
|
||||
function test:outline-xml(){
|
||||
let $PDF:="samples.pdf/icelandic-dictionary.pdf"=>test:resolve()
|
||||
let $outline:=pdfbox:open($PDF)=>pdfbox:outline()=>pdfbox:outline-xml()
|
||||
return unit:assert-equals(count($outline/bookmark),31)
|
||||
};
|
||||
|
||||
declare %unit:test
|
||||
function test:pagelabels(){
|
||||
let $PDF:="samples.pdf/BaseX100.pdf"=>test:resolve()
|
||||
let $labels:=pdfbox:open($PDF)=>pdfbox:pageLabels()
|
||||
return (
|
||||
unit:assert($labels[1]="i") ,
|
||||
unit:assert($labels[27]="1")
|
||||
)
|
||||
};
|
||||
|
||||
declare %unit:test
|
||||
function test:save(){
|
||||
let $dest:=file:create-temp-file("test",".pdf")=>trace("DEST: ")
|
||||
let $PDF:="samples.pdf/BaseX100.pdf"=>test:resolve()
|
||||
let $outline:=pdfbox:open($PDF)=>pdfbox:extract(2,12,$dest)
|
||||
return unit:assert(true())
|
||||
};
|
||||
|
||||
declare %unit:test
|
||||
function test:page-text(){
|
||||
let $PDF:="samples.pdf/BaseX100.pdf"=>test:resolve()
|
||||
let $text:=pdfbox:open($PDF)=>pdfbox:getText(1)
|
||||
return unit:assert(starts-with($text,"BaseX Documentation"))
|
||||
};
|
||||
|
||||
declare function test:resolve($file as xs:string){
|
||||
file:resolve-path($file,$test:base)
|
||||
};
|
||||
|
|
@ -8,7 +8,7 @@ declare
|
|||
%rest:path('/pdf/api/sources')
|
||||
%output:method("json")
|
||||
%output:json("format=xquery")
|
||||
function api:apt()
|
||||
function api:apt() as map(*)
|
||||
{
|
||||
let $base:="C:/Users/mrwhe/git/expkg-zone58/pdfbox/data/"
|
||||
let $d:="1e/"
|
||||
|
|
@ -18,6 +18,7 @@ function api:apt()
|
|||
"items": array{$f!api:path-info(.)}
|
||||
}
|
||||
};
|
||||
|
||||
declare function api:path-info($file as xs:string)
|
||||
as map(*)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1,6 +1,13 @@
|
|||
Uses
|
||||
* https://github.com/blikblum/slick-router#readme
|
||||
* https://dev.to/blikblum/slick-router-a-powerful-router-for-web-components-3fck
|
||||
|
||||
## Sync
|
||||
```
|
||||
cd C:\Users\mrwhe\git\expkg-zone58\pdfbox\src\webapp\pdf
|
||||
|
||||
c:\DeltaCopy\rsync -rlptz --progress --exclude=.git --exclude=.vscode . andy@localhost::basexserv/
|
||||
```
|
||||
## random html in markdown
|
||||
<style>table, th, td {
|
||||
border: 1px solid black;
|
||||
|
|
|
|||
|
|
@ -14,6 +14,11 @@ customElements.define('application-view',
|
|||
}
|
||||
}
|
||||
})
|
||||
this.addEventListener('load', e => {
|
||||
const data = e.detail;
|
||||
notify(JSON.stringify(data.items[0]));
|
||||
}
|
||||
)
|
||||
// Custom function to emit toast notifications
|
||||
function notify(message, variant = 'primary', icon = 'info-circle', duration = 3000) {
|
||||
const alert = Object.assign(document.createElement('sl-alert'), {
|
||||
|
|
@ -83,7 +88,6 @@ customElements.define('application-view',
|
|||
)
|
||||
customElements.define('home-view',
|
||||
class HomeView extends withRouterLinks(HTMLElement) {
|
||||
|
||||
connectedCallback() {
|
||||
this.getModel();
|
||||
}
|
||||
|
|
@ -100,13 +104,13 @@ customElements.define('home-view',
|
|||
}
|
||||
renderPosts(data) {
|
||||
const count = data.count
|
||||
const shadowRoot = this.attachShadow({ mode: "closed" });
|
||||
const shadowRoot = this.attachShadow({ mode: "open" });
|
||||
const div = document.createElement("div", { class: "cards" });
|
||||
shadowRoot.appendChild(div);
|
||||
data.items.forEach(item => {
|
||||
shadowRoot.appendChild(Object.assign(
|
||||
document.createElement('sl-card'), {
|
||||
textContent: item.slug
|
||||
}
|
||||
))
|
||||
div.appendChild(Object.assign(
|
||||
document.createElement('sl-card'), { class: "card", textContent: item.slug })
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
@ -189,7 +193,8 @@ customElements.define('settings-view',
|
|||
<sl-icon slot="icon" name="gear"></sl-icon>
|
||||
<strong>Your settings have been updated</strong><br />
|
||||
Settings will take effect on next login.
|
||||
</sl-alert>
|
||||
</sl-alert>
|
||||
<fetch-json src='/pdf/api/sources'/>
|
||||
</div>
|
||||
`
|
||||
}
|
||||
|
|
@ -213,6 +218,35 @@ customElements.define('profile-view',
|
|||
|
||||
customElements.define('profile-index-view',
|
||||
class ProfileIndexView extends HTMLElement {
|
||||
|
||||
connectedCallback() {
|
||||
|
||||
this.innerHTML = `
|
||||
<div class='ProfileIndex'>
|
||||
<h2>${this.$route.params.user} profile</h2>
|
||||
</div>
|
||||
`
|
||||
}
|
||||
}
|
||||
)
|
||||
customElements.define('cards-panel',
|
||||
class CardPanel extends HTMLElement {
|
||||
constructor(){
|
||||
super();
|
||||
const template = document.createElement('template');
|
||||
template.id = 'pool-calculator-template';
|
||||
template.innerHTML = `
|
||||
<style>
|
||||
|
||||
</style>
|
||||
|
||||
<div class="input-section">
|
||||
|
||||
<!-- ... -->
|
||||
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
connectedCallback() {
|
||||
this.innerHTML = `
|
||||
<div class='ProfileIndex'>
|
||||
|
|
@ -222,4 +256,35 @@ customElements.define('profile-index-view',
|
|||
}
|
||||
}
|
||||
)
|
||||
customElements.define('fetch-json',
|
||||
class FetchJson extends HTMLElement {
|
||||
static observedAttributes = ["src", "size"];
|
||||
|
||||
connectedCallback() {
|
||||
this.getModel();
|
||||
}
|
||||
getModel() {
|
||||
const src = this.getAttribute('src')
|
||||
+ "?" + new URLSearchParams({ foo: 'value', bar: 2, });
|
||||
return new Promise((res, rej) => {
|
||||
fetch(src)
|
||||
.then(data => data.json())
|
||||
.then((json) => {
|
||||
this.data=data;
|
||||
this.renderPosts(json);
|
||||
res();
|
||||
})
|
||||
.catch((error) => rej(error));
|
||||
})
|
||||
}
|
||||
renderPosts(data) {
|
||||
this.innerHTML = `<span>${this.getAttribute('src')} : ${data.count}</span>`;
|
||||
|
||||
this.dispatchEvent(new CustomEvent("load", {
|
||||
detail: data,
|
||||
composed: true,
|
||||
bubbles: true
|
||||
}));
|
||||
}
|
||||
}
|
||||
)
|
||||
|
|
|
|||
|
|
@ -18,7 +18,6 @@
|
|||
<link rel="stylesheet" href="animations.css" />
|
||||
|
||||
</head>
|
||||
|
||||
<body>
|
||||
</body>
|
||||
|
||||
|
|
|
|||
|
|
@ -73,3 +73,21 @@ html {
|
|||
router-outlet > * {
|
||||
display: block;
|
||||
}
|
||||
|
||||
.card-header {
|
||||
max-width: 300px;
|
||||
}
|
||||
|
||||
.card-header [slot='header'] {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
}
|
||||
|
||||
.card-header h3 {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.card-header sl-icon-button {
|
||||
font-size: var(--sl-font-size-medium);
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue