[add] image save
This commit is contained in:
parent
bd5b7cf006
commit
af2fac39b2
2
.vscode/settings.json
vendored
2
.vscode/settings.json
vendored
@ -1,5 +1,5 @@
|
||||
{
|
||||
"basexTools.xquery.profile": "basex-10",
|
||||
"basexTools.xquery.showHovers": false,
|
||||
"basexTools.xquery.showHovers": true,
|
||||
|
||||
}
|
@ -1 +1 @@
|
||||
{"cells":[{"kind":1,"language":"markdown","value":"# PDFBox3 \r\nA BaseX 10+ interface to Apache PDFBox® library version 3 \r\n## Apache PDFBox® - A Java PDF Library\r\n\r\nThe Apache PDFBox® library is an open source Java tool for working with PDF documents. This project allows creation of new PDF documents, manipulation of existing documents and the ability to extract content from documents. Apache PDFBox also includes several command-line utilities. Apache PDFBox is published under the Apache License v2.0.\r\nhttps://pdfbox.apache.org/"},{"kind":1,"language":"markdown","value":"It comes with the useful PDF debug tool `java -jar debugger-app-3.0.1.jar`"},{"kind":1,"language":"markdown","value":"## Set up XQuery context for following code..."},{"kind":2,"language":"xquery","value":"(:<:)(: XQuery Context :)\r\nimport module namespace pdfbox = \"urn:expkg-zone58:pdfbox3\" at \"../src/lib/pdfbox3.xqm\";\r\nimport module namespace bookpages = 'urn:bookpages' at \"../src/lib/bookpages.xqm\";\r\nimport module namespace config = 'urn:abc-clio:config' at 'C:\\Users\\mrwhe\\git\\bloomsbury\\content-architecture\\xquery\\ABC-CLIO/lib/abc-config.xqm';\r\n\r\ndeclare variable $samples:= map{\r\n \"climate\": \"drop-01d\\set\\2-6-1\\A5579C_1\\271989---Book_File-Web_PDF_9798400627484_486728.pdf\",\r\n \"women\": \"drop-01d\\set\\2-6-1\\A6229C_1\\257334---Book_File-Web_PDF_9798216172628_486742.pdf\",\r\n \"genocide\": \"drop1-pdf\\GR2967-TRD\\272791---Book_File-Web_PDF_9798400640216_486366.pdf\",\r\n \"world\": \"drop-01c\\gpg-book\\2-6\\A3506C-TRD\\256186---Book_File-Web_PDF_9798216038955_486148.pdf\"\r\n};\r\ndeclare variable $PDF:= (: $samples?women=>file:resolve-path($config:data) :)\r\n\"C:\\Users\\mrwhe\\git\\bloomsbury\\content-architecture\\xquery\\ABC-CLIO\\data\\drop-01e\\set\\2-6-1\\A5690C_1\\257107---Book_File-Web_PDF_9798400691218_486731.pdf\";"},{"kind":1,"language":"markdown","value":"# Version"},{"kind":1,"language":"markdown","value":" ## pdfbox version"},{"kind":2,"language":"xquery","value":"pdfbox:version()"},{"kind":1,"language":"markdown","value":"PDF specification version used by document"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)=>pdfbox:pdfVersion()"},{"kind":1,"language":"markdown","value":"# Page count for PDF"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)=>pdfbox:page-count()"},{"kind":1,"language":"markdown","value":"## save range to new pdf"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)=>pdfbox:extract(2,12,\"c:\\tmp\\a.pdf\")"},{"kind":1,"language":"markdown","value":"## Outline / bookmarks"},{"kind":1,"language":"markdown","value":"### sequence of maps"},{"kind":2,"language":"xquery","value":"\r\npdfbox:open($PDF)=>pdfbox:outline()"},{"kind":1,"language":"markdown","value":"XML"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)=>pdfbox:outline()=>pdfbox:outline-xml()"},{"kind":1,"language":"markdown","value":"## Page labels"},{"kind":2,"language":"xquery","value":"\r\npdfbox:open($PDF)=>pdfbox:pageLabels()"},{"kind":1,"language":"markdown","value":"# getText from page index"},{"kind":2,"language":"xquery","value":"let $doc:=pdfbox:open($PDF)\r\nreturn pdfbox:getText($doc,56)"},{"kind":1,"language":"markdown","value":"## PageNo text analysis"},{"kind":2,"language":"xquery","value":"let $doc:=pdfbox:open($PDF)\r\nreturn pagenos:page-report($doc)\r\n"},{"kind":1,"language":"markdown","value":"# Inverted pageno map"},{"kind":2,"language":"xquery","value":"let $doc:=pdfbox:open($PDF)\r\nreturn pagenos:page-report($doc)=>pagenos:inverted-map()"},{"kind":1,"language":"markdown","value":"## report"},{"kind":2,"language":"xquery","value":"declare variable $a:=file:resolve-path(\"../data/1e/\",file:base-dir());\r\n\r\nfor $f in file:list($a,true(),\"*.pdf\") \r\nwhere not(contains($f,\"outputs\"))\r\nlet $doc:=pdfbox:open(file:resolve-path($f,$a))\r\n(: let $outline:=pdfbox:outline($doc) :)\r\nlet $count:=pdfbox:page-count($doc)\r\norder by $count \r\nreturn ``[`{$f}`: `{ $count }`]``"}]}
|
||||
{"cells":[{"kind":1,"language":"markdown","value":"# PDFBox3 \r\nA BaseX 10+ interface to Apache PDFBox® library version 3 \r\n## Apache PDFBox® - A Java PDF Library\r\n\r\nThe Apache PDFBox® library is an open source Java tool for working with PDF documents. This project allows creation of new PDF documents, manipulation of existing documents and the ability to extract content from documents. Apache PDFBox also includes several command-line utilities. Apache PDFBox is published under the Apache License v2.0.\r\nhttps://pdfbox.apache.org/"},{"kind":1,"language":"markdown","value":"It comes with the useful PDF debug tool `java -jar debugger-app-3.0.1.jar`"},{"kind":1,"language":"markdown","value":"## Set up XQuery context for following code..."},{"kind":2,"language":"xquery","value":"(:<:)(: XQuery Context :)\r\nimport module namespace pdfbox = \"urn:expkg-zone58:pdfbox3\" at \"../src/lib/pdfbox3.xqm\";\r\nimport module namespace bookpages = 'urn:bookpages' at \"../src/lib/bookpages.xqm\";\r\nimport module namespace pdfscrape = 'urn:pdfscrape' at \"../src/lib/pdfscrape.xqm\";\r\nimport module namespace config = 'urn:abc-clio:config' at 'C:\\Users\\mrwhe\\git\\bloomsbury\\content-architecture\\xquery\\ABC-CLIO/lib/abc-config.xqm';\r\n\r\ndeclare variable $samples:= map{\r\n \"climate\": \"drop-01d\\set\\2-6-1\\A5579C_1\\271989---Book_File-Web_PDF_9798400627484_486728.pdf\",\r\n \"women\": \"drop-01d\\set\\2-6-1\\A6229C_1\\257334---Book_File-Web_PDF_9798216172628_486742.pdf\",\r\n \"genocide\": \"drop1-pdf\\GR2967-TRD\\272791---Book_File-Web_PDF_9798400640216_486366.pdf\",\r\n \"world\": \"drop-01c\\gpg-book\\2-6\\A3506C-TRD\\256186---Book_File-Web_PDF_9798216038955_486148.pdf\"\r\n};\r\ndeclare variable $PDF:= (: $samples?women=>file:resolve-path($config:data) :)\r\n\"C:\\Users\\mrwhe\\git\\bloomsbury\\content-architecture\\xquery\\ABC-CLIO\\data\\drop-01e\\set\\2-6-1\\A5690C_1\\257107---Book_File-Web_PDF_9798400691218_486731.pdf\";"},{"kind":1,"language":"markdown","value":" ## Check pdfbox version"},{"kind":2,"language":"xquery","value":"pdfbox:version()"},{"kind":1,"language":"markdown","value":"PDF specification version used by document"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)=>pdfbox:pdfVersion()"},{"kind":1,"language":"markdown","value":"# Page count for PDF"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)=>pdfbox:page-count()"},{"kind":1,"language":"markdown","value":"# save range to new pdf"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)=>pdfbox:extract(2,12,\"c:\\tmp\\a.pdf\")"},{"kind":1,"language":"markdown","value":"## Outline / bookmarks"},{"kind":1,"language":"markdown","value":"### sequence of maps"},{"kind":2,"language":"xquery","value":"\r\npdfbox:open($PDF)=>pdfbox:outline()"},{"kind":1,"language":"markdown","value":"XML"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)=>pdfbox:outline()=>pdfbox:outline-xml()"},{"kind":1,"language":"markdown","value":"## Page labels"},{"kind":2,"language":"xquery","value":"\r\npdfbox:open($PDF)=>pdfbox:pageLabels()"},{"kind":1,"language":"markdown","value":"# getText from page index"},{"kind":2,"language":"xquery","value":"let $doc:=pdfbox:open($PDF)\r\nreturn pdfbox:getText($doc,56)"},{"kind":1,"language":"markdown","value":"# Page scraping"},{"kind":1,"language":"markdown","value":"## pdf scrape text analysis"},{"kind":2,"language":"xquery","value":"let $doc:=pdfbox:open($PDF)\r\nreturn pdfscrape:page-report($doc)\r\n"},{"kind":1,"language":"markdown","value":"## Inverted pageno map"},{"kind":2,"language":"xquery","value":"let $doc:=pdfbox:open($PDF)\r\nreturn pdfscrape:page-report($doc)=>pdfscrape:inverted-map()"},{"kind":1,"language":"markdown","value":"# Save images"},{"kind":2,"language":"xquery","value":"pdfbox:open($PDF)\r\n=> pdfbox:pageAsImage(0,0.25)\r\n=> pdfbox:imageSave(\"c:\\tmp\\page0.gif\",\"gif\")"},{"kind":1,"language":"markdown","value":"## report"},{"kind":2,"language":"xquery","value":"declare variable $a:=file:resolve-path(\"../data/1e/\",file:base-dir());\r\n\r\nfor $f in file:list($a,true(),\"*.pdf\") \r\nwhere not(contains($f,\"outputs\"))\r\nlet $doc:=pdfbox:open(file:resolve-path($f,$a))\r\n(: let $outline:=pdfbox:outline($doc) :)\r\nlet $count:=pdfbox:page-count($doc)\r\norder by $count \r\nreturn ``[`{$f}`: `{ $count }`]``"}]}
|
@ -1 +1,5 @@
|
||||
Examples with pageLabels and outlines
|
||||
# Example PDFs with pageLabels and outlines
|
||||
|
||||
* [BaseX100.pdf](https://files.basex.org/releases/10.0/BaseX100.pdf)
|
||||
* [icelandic-dictionary.pdf](http://css4.pub/2015/icelandic/dictionary.pdf)
|
||||
* [page-numbers.pdf](https://www.w3.org/WAI/WCAG22/working-examples/pdf-page-numbers/page-numbers.pdf)
|
||||
|
@ -40,7 +40,7 @@ declare namespace PDDocumentInformation ="java:org.apache.pdfbox.pdmodel.PDDocum
|
||||
@see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.0/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/PDOutlineItem.html
|
||||
:)
|
||||
declare namespace PDOutlineItem="java:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem";
|
||||
|
||||
declare namespace PDFRenderer="java:org.apache.pdfbox.rendering.PDFRenderer";
|
||||
declare namespace RandomAccessReadBufferedFile = "java:org.apache.pdfbox.io.RandomAccessReadBufferedFile";
|
||||
declare namespace File ="java:java.io.File";
|
||||
|
||||
@ -220,6 +220,7 @@ as xs:string*
|
||||
=>PDDocumentCatalog:getPageLabels()
|
||||
=>PDPageLabels:getLabelsByPageIndices()
|
||||
};
|
||||
|
||||
(:~ return text on $pageNo :)
|
||||
declare function pdfbox:getText($doc as item(), $pageNo as xs:integer)
|
||||
as xs:string{
|
||||
@ -231,6 +232,7 @@ as xs:string{
|
||||
return (# db:checkstrings #) {PDFTextStripper:getText($tStripper,$doc)}
|
||||
};
|
||||
|
||||
(:~ summary info as map for $pdfpath :)
|
||||
declare function pdfbox:report($pdfpath as xs:string)
|
||||
as map(*){
|
||||
let $doc:=pdfbox:open($pdfpath)
|
||||
@ -242,8 +244,17 @@ as map(*){
|
||||
)=>map:merge()
|
||||
};
|
||||
|
||||
(: @TODO :)
|
||||
declare function pdfbox:pageAsImage($doc as item(), $pageNo as xs:integer)
|
||||
(:~ page (ZERO based) as image
|
||||
@param $scale 1=72 dpi :)
|
||||
declare function pdfbox:pageAsImage($doc as item(), $pageNo as xs:integer,$scale as xs:float)
|
||||
as item(){
|
||||
(: BufferedImage image = pdfRenderer.renderImageWithDPI(i, 200, ImageType.RGB) :)
|
||||
PDFRenderer:new($doc)
|
||||
=>PDFRenderer:renderImage($pageNo,$scale)
|
||||
};
|
||||
|
||||
(:~ save bufferedimage to $dest
|
||||
@param $type = "gif","png" etc:)
|
||||
declare function pdfbox:imageSave($bufferedImage as item(),$dest as xs:string,$type as xs:string)
|
||||
as xs:boolean{
|
||||
Q{java:javax.imageio.ImageIO}write($bufferedImage , $type, File:new($dest))
|
||||
};
|
||||
|
8
src/scratch/abc2.xq
Normal file
8
src/scratch/abc2.xq
Normal file
@ -0,0 +1,8 @@
|
||||
declare variable $samples:= map{
|
||||
"climate": "data\drop-01d\set\2-6-1\A5579C_1\271989---Book_File-Web_PDF_9798400627484_486728.pdf",
|
||||
"women": "data\drop-01d\set\2-6-1\A6229C_1\257334---Book_File-Web_PDF_9798216172628_486742.pdf",
|
||||
"genocide": "data\drop1-pdf\GR2967-TRD\272791---Book_File-Web_PDF_9798400640216_486366.pdf",
|
||||
"world": "data\drop-01c\gpg-book\2-6\A3506C-TRD\256186---Book_File-Web_PDF_9798216038955_486148.pdf"
|
||||
};
|
||||
declare variable $base:= "C:\Users\mrwhe\git\bloomsbury\content-architecture\xquery\ABC-CLIO\data";
|
||||
42
|
@ -6,6 +6,10 @@ html {
|
||||
padding: 0;
|
||||
font-family: 'Open Sans', sans-serif;
|
||||
}
|
||||
sl-card {
|
||||
display: flex;
|
||||
width: 100px;
|
||||
}
|
||||
|
||||
.App {
|
||||
width: 800px;
|
||||
|
32
src/webapp/pdf/static/user.html
Normal file
32
src/webapp/pdf/static/user.html
Normal file
@ -0,0 +1,32 @@
|
||||
<!doctype html>
|
||||
<script type="module">
|
||||
import { define, store, html } from 'https://esm.sh/hybrids@^8';
|
||||
|
||||
const User = {
|
||||
id: true,
|
||||
res: null,
|
||||
firstName: "",
|
||||
lastName: "",
|
||||
[store.connect]: {
|
||||
get: id => fetch(`http://localhost:8080/pdf/api/sources`)
|
||||
.then(data => data.json())
|
||||
.then(res=> { "43"})
|
||||
},
|
||||
};
|
||||
|
||||
define({
|
||||
tag: "user-details",
|
||||
user: store(User),
|
||||
render: ({ user }) => html`
|
||||
<div>
|
||||
${store.pending(user) && `Loading...`}
|
||||
${store.error(user) && `Something went wrong...`}
|
||||
|
||||
${store.ready(user) && html`
|
||||
<p>${user.id} ${user.lastName}</p>
|
||||
`}
|
||||
</div>
|
||||
`,
|
||||
});
|
||||
</script>
|
||||
<user-details user="2"></user-details>
|
Loading…
Reference in New Issue
Block a user