diff --git a/.gitignore b/.gitignore index adbb97d..35f13f1 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -data/ \ No newline at end of file +data/ +docs/xqdoc/ \ No newline at end of file diff --git a/docs/xqdoc/annotations.html b/docs/xqdoc/annotations.html deleted file mode 100644 index d702d09..0000000 --- a/docs/xqdoc/annotations.html +++ /dev/null @@ -1,11 +0,0 @@ -jars - xqDocA - xqDocA

- Project - jars -  Annotations -

Summary

This project uses 0 annotation namespaces.

Related documents
ViewDescriptionFormat
reportIndex of sourcesxhtml
restxqSummary of REST interfacexhtml
importsSummary of import usagexhtml
imports-diagProject wide module imports as html mermaid class diagramhtml5
imports-diag.mmdProject wide module imports as a mermaid class diagramtext
xqdoca.xmlxqDocA run configuration report (XML)xml
xqdoc-validatevalidate generated xqdoc filesxml

Annotations

\ No newline at end of file diff --git a/docs/xqdoc/imports.html b/docs/xqdoc/imports.html deleted file mode 100644 index 65bf09c..0000000 --- a/docs/xqdoc/imports.html +++ /dev/null @@ -1,9 +0,0 @@ -jars - xqDocA - xqDocA

Project jars -  Imports -

Summary

Lists all modules imported.

Related documents
ViewDescriptionFormat
reportIndex of sourcesxhtml
restxqSummary of REST interfacexhtml
imports-diagProject wide module imports as html mermaid class diagramhtml5
imports-diag.mmdProject wide module imports as a mermaid class diagramtext
annotationsSummary of XQuery annotation usexhtml
xqdoca.xmlxqDocA run configuration report (XML)xml
xqdoc-validatevalidate generated xqdoc filesxml

Imports (0)

\ No newline at end of file diff --git a/docs/xqdoc/index.html b/docs/xqdoc/index.html deleted file mode 100644 index 95bdac6..0000000 --- a/docs/xqdoc/index.html +++ /dev/null @@ -1,12 +0,0 @@ -jars - xqDocA - xqDocA

- Project jars -  XQuery source documentation -

Summary

The project - jars contains - 1 XQuery source files, and uses - 0 annotation namespaces. -

This document was built from source folder C:/Users/mrwhe/git/expkg-zone58/pdfbox/jars/ on - Saturday, 25th January 2025.

Related documents
ViewDescriptionFormat
reportIndex of sourcesxhtml
restxqSummary of REST interfacexhtml
importsSummary of import usagexhtml
imports-diagProject wide module imports as html mermaid class diagramhtml5
imports-diag.mmdProject wide module imports as a mermaid class diagramtext
annotationsSummary of XQuery annotation usexhtml
xqdoca.xmlxqDocA run configuration report (XML)xml
xqdoc-validatevalidate generated xqdoc filesxml

XQuery Main (0)

None

XQuery Library (1)

UriPrefixDescriptionUseAMetrics
org.expkg_zone58.Pdfbox3pdfbox
0
Library
↖0
V#0
F#24

File view (1)

Annotation namespaces (0)

A total of 0 annotations are defined. -

\ No newline at end of file diff --git a/docs/xqdoc/mermaid.html b/docs/xqdoc/mermaid.html deleted file mode 100644 index 9e73b93..0000000 --- a/docs/xqdoc/mermaid.html +++ /dev/null @@ -1,35 +0,0 @@ -Module imports diagram (Mermaid) - xqDocA
%%{init: {'securityLevel': 'loose', 'theme':'base'}}%% -classDiagram -direction TB - - class RESTXQ:::cssRest { } - class INVOKE:::cssMain { } - class TEST { } - -class pdfbox { << Pdfbox3.xqm >>} - - -link pdfbox "modules/F000001/index.html" "This is a tooltip for org.expkg_zone58.Pdfbox3" - -
\ No newline at end of file diff --git a/docs/xqdoc/mermaid.mmd b/docs/xqdoc/mermaid.mmd deleted file mode 100644 index 9ffed09..0000000 --- a/docs/xqdoc/mermaid.mmd +++ /dev/null @@ -1,13 +0,0 @@ -%%{init: {'securityLevel': 'loose', 'theme':'base'}}%% -classDiagram -direction TB - - class RESTXQ:::cssRest { } - class INVOKE:::cssMain { } - class TEST { } - -class pdfbox { << Pdfbox3.xqm >>} - - -link pdfbox "modules/F000001/index.html" "This is a tooltip for org.expkg_zone58.Pdfbox3" - diff --git a/docs/xqdoc/modules/F000001/index.html b/docs/xqdoc/modules/F000001/index.html deleted file mode 100644 index af292c1..0000000 --- a/docs/xqdoc/modules/F000001/index.html +++ /dev/null @@ -1,474 +0,0 @@ -jars - xqDocA - xqDocA

org.expkg_zone58.Pdfbox3  - library module

Summary

MISSING
Related documents
ViewDescriptionFormat
xqdocxqDoc xml file from the source modulexml
xqparsexqparse xml file from the source modulexml

Imports

- This module is imported by - 0 modules. It imports - 0 modules. -

Variables

None

Functions

4.1 pdfbox:_outline

Arities: #2

Signatures
pdfbox:_outline - ( - $doc as item(), $outlineItem as item()? ) as map(*)
Parameters
  • doc as item()
  • outlineItem as item()?
Return
  • map(*)
Invoked by 0 functions from 0 modules
    Source ( 17 lines)
    function pdfbox:_outline($doc as item(),$outlineItem as item()?)
    -as map(*){
    - hof:until(
    -            function($output) { empty($output?this) },
    -            function($input ) { 
    -                      let $bk:= pdfbox:bookmark($input?this,$doc)
    -                      let $bk:= if($bk?hasChildren)
    -                                then let $kids:=pdfbox:outline($doc,PDOutlineItem:getFirstChild($input?this))
    -                                     return map:merge(($bk,map:entry("children",$kids)))
    -                                else $bk 
    -                      return map{
    -                            "list": ($input?list, $bk),
    -                            "this":  PDOutlineItem:getNextSibling($input?this)}
    -                          },
    -            map{"list":(),"this":$outlineItem}
    -        ) 
    -}

    4.2 pdfbox:bookmark

    Arities: #2

    Signatures
    pdfbox:bookmark - ( - $bookmark as item(), $doc as item() ) as map(*)
    Parameters
    • bookmark as item()
    • doc as item()
    Return
    • map(*)
    Invoked by 0 functions from 0 modules
      Source ( 9 lines)
      function pdfbox:bookmark($bookmark as item(),$doc as item())
      -as map(*)
      -{
      - map{ 
      -  "index":  PDOutlineItem:findDestinationPage($bookmark,$doc)=>pdfbox:pageIndex($doc),
      -  "title":  (# db:checkstrings #) {PDOutlineItem:getTitle($bookmark)}=>translate("�",""),
      -  "hasChildren": PDOutlineItem:hasChildren($bookmark)
      -  }
      -}

      4.3 pdfbox:bookmark-xml

      Arities: #1

      Signatures
      pdfbox:bookmark-xml - ( - $outline as map(*)* ) as element(bookmark)*
      Parameters
      • outline as map(*)*
      Return
      • element(bookmark) *
      Invoked by 0 functions from 0 modules
        Source ( 8 lines)
        function pdfbox:bookmark-xml($outline as map(*)*)
        -as element(bookmark)*
        -{
        -  $outline!
        -  <bookmark title="{?title}" index="{?index}">
        -    {?children!pdfbox:bookmark-xml(.)}
        -  </bookmark>
        -}

        4.4 pdfbox:close

        Arities: #1

        Signatures
        pdfbox:close - ( - $doc as item() ) as empty-sequence
        Parameters
        • doc as item()
        Return
        • empty-sequence
        Invoked by 0 functions from 0 modules
          Source ( 6 lines)
          function pdfbox:close($doc as item())
          -as empty-sequence(){
          -  (# db:wrapjava void #) {
          -     PDDocument:close($doc)
          -  }
          -}

          4.5 pdfbox:extract

          Arities: #4

          Summary
          Signatures
          pdfbox:extract - ( - $doc as item(), $start as xs:integer, $end as xs:integer, $target as xs:string ) as xs:string
          Parameters
          • doc as item()
          • start as xs:integer
          • end as xs:integer
          • target as xs:string
          Return
          • xs:string
          Invoked by 0 functions from 0 modules
            Source ( 7 lines)
            function pdfbox:extract($doc as item(), 
            -             $start as xs:integer,$end as xs:integer,$target as xs:string)
            -as xs:string
            -{
            -    let $a:=PageExtractor:new($doc, $start, $end) =>PageExtractor:extract()
            -    return (pdfbox:save($a,$target),pdfbox:close($a)) 
            -}

            4.6 pdfbox:getPageLabels

            Arities: #1

            Summary
            Signatures
            pdfbox:getPageLabels - ( - $doc as item() ) as item()
            Parameters
            • doc as item()
            Return
            • item()
            Invoked by 0 functions from 0 modules
              Source ( 6 lines)
              function pdfbox:getPageLabels($doc as item())
              -as item()
              -{
              -  PDDocument:getDocumentCatalog($doc)
              -  =>PDDocumentCatalog:getPageLabels()
              -}

              4.7 pdfbox:getText

              Arities: #2

              Summary
              Signatures
              pdfbox:getText - ( - $doc as item(), $pageNo as xs:integer ) as xs:string
              Parameters
              • doc as item()
              • pageNo as xs:integer
              Return
              • xs:string
              Invoked by 0 functions from 0 modules
                Source ( 9 lines)
                function pdfbox:getText($doc as item(), $pageNo as xs:integer)
                -as xs:string{
                -  let $tStripper := (# db:wrapjava instance #) {
                -         PDFTextStripper:new()
                -         => PDFTextStripper:setStartPage($pageNo)
                -         => PDFTextStripper:setEndPage($pageNo)
                -       }
                -  return (# db:checkstrings #) {PDFTextStripper:getText($tStripper,$doc)}
                -}

                4.8 pdfbox:gregToISO

                Arities: #1

                Summary
                Signatures
                pdfbox:gregToISO - ( - $item as item() ) as xs:string
                Parameters
                • item as item()
                Return
                • xs:string
                Invoked by 0 functions from 0 modules
                  Source ( 4 lines)
                  function pdfbox:gregToISO($item as item())
                  -as xs:string{
                  - Q{java:java.util.GregorianCalendar}toZonedDateTime($item)=>string()
                  -}

                  4.9 pdfbox:imageBinary

                  Arities: #2

                  Summary
                  Signatures
                  pdfbox:imageBinary - ( - $bufferedImage as item(), $type as xs:string ) as xs:base64Binary
                  Parameters
                  • bufferedImage as item()
                  • type as xs:string
                  Return
                  • xs:base64Binary
                  Invoked by 0 functions from 0 modules
                    Source ( 7 lines)
                    function pdfbox:imageBinary($bufferedImage as item(),$type as xs:string)
                    -as xs:base64Binary{
                    -  let $bytes:=Q{java:java.io.ByteArrayOutputStream}new()
                    -  let $_:=Q{java:javax.imageio.ImageIO}write($bufferedImage , $type,  $bytes)
                    -  return Q{java:java.io.ByteArrayOutputStream}toByteArray($bytes)
                    -         =>convert:integers-to-base64()
                    -}

                    4.10 pdfbox:imageSave

                    Arities: #3

                    Summary
                    Signatures
                    pdfbox:imageSave - ( - $bufferedImage as item(), $dest as xs:string, $type as xs:string ) as xs:boolean
                    Parameters
                    • bufferedImage as item()
                    • dest as xs:string
                    • type as xs:string
                    Return
                    • xs:boolean
                    Invoked by 0 functions from 0 modules
                      Source ( 4 lines)
                      function pdfbox:imageSave($bufferedImage as item(),$dest as xs:string,$type as xs:string)
                      -as xs:boolean{
                      -  Q{java:javax.imageio.ImageIO}write($bufferedImage , $type,  File:new($dest))
                      -}

                      4.11 pdfbox:information

                      Arities: #1

                      Summary
                      Signatures
                      pdfbox:information - ( - $doc as item() ) as map(*)
                      Parameters
                      • doc as item()
                      Return
                      • map(*)
                      Invoked by 0 functions from 0 modules
                        Source ( 13 lines)
                        function pdfbox:information($doc as item())
                        -as map(*){
                        -  let $info:=PDDocument:getDocumentInformation($doc)
                        -  return map{
                        -    "title": PDDocumentInformation:getTitle($info),
                        -    "creator": PDDocumentInformation:getCreator($info),
                        -    "producer": PDDocumentInformation:getProducer($info),
                        -    "subject": PDDocumentInformation:getSubject($info),
                        -     "keywords": PDDocumentInformation:getKeywords($info),
                        -     "creationdate": pdfbox:gregToISO(PDDocumentInformation:getCreationDate($info)),
                        -    "author": PDDocumentInformation:getAuthor($info)
                        -  }
                        -}

                        4.12 pdfbox:open

                        Arities: #1

                        Summary
                        Signatures
                        pdfbox:open - ( - $pdfpath as xs:string ) as item()
                        Parameters
                        • pdfpath as xs:string
                        Return
                        • item()
                        Invoked by 0 functions from 0 modules
                          Source ( 4 lines)
                          function pdfbox:open($pdfpath as xs:string)
                          -as item(){
                          -  Loader:loadPDF( RandomAccessReadBufferedFile:new($pdfpath))
                          -}

                          4.13 pdfbox:outline

                          Arities: #1#2

                          Summary
                          Signatures
                          pdfbox:outline - ( - $doc as item() ) as map(*)*
                          pdfbox:outline - ( - $doc as item(), $outlineItem as item()? ) as map(*)*
                          Parameters
                          • doc as item()
                          • outlineItem as item()?
                          Return
                          • map(*) *
                          Invoked by 0 functions from 0 modules
                            Source ( 17 lines)
                            function pdfbox:outline($doc as item())
                            -as map(*)*{
                            -  (# db:wrapjava some #) {
                            -  let $outline:=
                            -                PDDocument:getDocumentCatalog($doc)
                            -                =>PDDocumentCatalog:getDocumentOutline()
                            - 
                            -  return  if(exists($outline))
                            -          then pdfbox:outline($doc,PDOutlineItem:getFirstChild($outline)) 
                            -  }
                            -}
                            function pdfbox:outline($doc as item(),$outlineItem as item()?)
                            -
                            -as map(*)*{
                            -  let $find as map(*):=pdfbox:_outline($doc ,$outlineItem)
                            -  return map:get($find,"list")
                            -}

                            4.14 pdfbox:outline-xml

                            Arities: #1

                            Summary
                            Signatures
                            pdfbox:outline-xml - ( - $outline as map(*)* ) as element(outline)
                            Parameters
                            • outline as map(*)*
                            Return
                            • element(outline)
                            Invoked by 0 functions from 0 modules
                              Source ( 6 lines)
                              function pdfbox:outline-xml($outline as map(*)*)
                              -as element(outline){
                              - element outline { 
                              -   $outline!pdfbox:bookmark-xml(.)
                              - }
                              -}

                              4.15 pdfbox:outx

                              Arities: #2

                              Signatures
                              pdfbox:outx - ( - $page, $document )
                              Parameters
                              • page as 
                              • document as 
                              Return
                              Invoked by 0 functions from 0 modules
                                Source ( 6 lines)
                                function pdfbox:outx($page ,$document)
                                -{
                                -  let $currentPage := PDOutlineItem:findDestinationPage($page,$document)
                                -  let $pageNumber := pdfbox:pageIndex($currentPage,$document)
                                -  return $pageNumber
                                -}

                                4.16 pdfbox:page-count

                                Arities: #1

                                Signatures
                                pdfbox:page-count - ( - $doc as item() ) as xs:integer
                                Parameters
                                • doc as item()
                                Return
                                • xs:integer
                                Invoked by 0 functions from 0 modules
                                  Source ( 4 lines)
                                  function pdfbox:page-count($doc as item())
                                  -as xs:integer{
                                  -  PDDocument:getNumberOfPages($doc)
                                  -}

                                  4.17 pdfbox:pageBufferedImage

                                  Arities: #3

                                  Summary
                                  Signatures
                                  pdfbox:pageBufferedImage - ( - $doc as item(), $pageNo as xs:integer, $scale as xs:float ) as item()
                                  Parameters
                                  • doc as item()
                                  • pageNo as xs:integer
                                  • scale as xs:float
                                  Return
                                  • item()
                                  Invoked by 0 functions from 0 modules
                                    Source ( 4 lines)
                                    function pdfbox:pageBufferedImage($doc as item(), $pageNo as xs:integer,$scale as xs:float)
                                    -as item(){
                                    - PDFRenderer:new($doc)=>PDFRenderer:renderImage($pageNo,$scale)
                                    -}

                                    4.18 pdfbox:pageIndex

                                    Arities: #2

                                    Summary
                                    Signatures
                                    pdfbox:pageIndex - ( - $page as item()?, $document ) as item()?
                                    Parameters
                                    • page as item()?
                                    • document as 
                                    Return
                                    • item() ?
                                    Invoked by 0 functions from 0 modules
                                      Source ( 10 lines)
                                      function pdfbox:pageIndex(
                                      -   $page as item()? (: as java:org.apache.pdfbox.pdmodel.PDPage :),
                                      -   $document)
                                      -as item()?
                                      -{
                                      -  if(exists($page))
                                      -  then PDDocument:getDocumentCatalog($document)
                                      -      =>PDDocumentCatalog:getPages()
                                      -      =>PDPageTree:indexOf($page)
                                      -}

                                      4.19 pdfbox:pageLabels

                                      Arities: #1

                                      Summary
                                      Signatures
                                      pdfbox:pageLabels - ( - $doc as item() ) as xs:string*
                                      Parameters
                                      • doc as item()
                                      Return
                                      • xs:string *
                                      Invoked by 0 functions from 0 modules
                                        Source ( 7 lines)
                                        function pdfbox:pageLabels($doc as item())
                                        -as xs:string*
                                        -{
                                        -  PDDocument:getDocumentCatalog($doc)
                                        -  =>PDDocumentCatalog:getPageLabels()
                                        -  =>PDPageLabels:getLabelsByPageIndices()
                                        -}

                                        4.20 pdfbox:pdfVersion

                                        Arities: #1

                                        Summary
                                        Signatures
                                        pdfbox:pdfVersion - ( - $doc as item() ) as xs:float
                                        Parameters
                                        • doc as item()
                                        Return
                                        • xs:float
                                        Invoked by 0 functions from 0 modules
                                          Source ( 4 lines)
                                          function pdfbox:pdfVersion($doc as item())
                                          -as xs:float{
                                          -  PDDocument:getVersion($doc)
                                          -}

                                          4.21 pdfbox:report

                                          Arities: #1

                                          Summary
                                          Signatures
                                          pdfbox:report - ( - $pdfpath as xs:string ) as map(*)
                                          Parameters
                                          • pdfpath as xs:string
                                          Return
                                          • map(*)
                                          Invoked by 0 functions from 0 modules
                                            Source ( 10 lines)
                                            function pdfbox:report($pdfpath as xs:string)
                                            -as map(*){
                                            - let $doc:=pdfbox:open($pdfpath)
                                            - return (map{
                                            -       "file":  $pdfpath,
                                            -       "pages": pdfbox:page-count($doc),
                                            -       "outline": pdfbox:outline($doc)=>count()
                                            -        },pdfbox:information($doc)
                                            -)=>map:merge()
                                            -}

                                            4.22 pdfbox:save

                                            Arities: #2

                                            Summary
                                            Signatures
                                            pdfbox:save - ( - $doc as item(), $savepath as xs:string ) as xs:string
                                            Parameters
                                            • doc as item()
                                            • savepath as xs:string
                                            Return
                                            • xs:string
                                            Invoked by 0 functions from 0 modules
                                              Source ( 4 lines)
                                              function pdfbox:save($doc as item(),$savepath as xs:string)
                                              -as xs:string{
                                              -   PDDocument:save($doc,File:new($savepath)),$savepath
                                              -}

                                              4.23 pdfbox:version

                                              Arities: #0

                                              Summary
                                              Signatures
                                              pdfbox:version - ( - ) as xs:string
                                              Return
                                              • xs:string
                                              Invoked by 0 functions from 0 modules
                                                Source ( 4 lines)
                                                function pdfbox:version()
                                                -as xs:string{
                                                -  Q{java:org.apache.pdfbox.util.Version}getVersion()
                                                -}

                                                Namespaces

                                                The following namespaces are defined:

                                                PrefixUri
                                                converthttp://basex.org/modules/convert
                                                dbhttp://basex.org/modules/db
                                                Filejava:java.io.File
                                                hofhttp://basex.org/modules/hof
                                                Loaderjava:org.apache.pdfbox.Loader
                                                maphttp://www.w3.org/2005/xpath-functions/map
                                                PageExtractorjava:org.apache.pdfbox.multipdf.PageExtractor
                                                PDDocumentjava:org.apache.pdfbox.pdmodel.PDDocument
                                                PDDocumentCatalogjava:org.apache.pdfbox.pdmodel.PDDocumentCatalog
                                                PDDocumentInformationjava:org.apache.pdfbox.pdmodel.PDDocumentInformation
                                                PDDocumentOutlinejava:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline
                                                pdfboxorg.expkg_zone58.Pdfbox3
                                                PDFRendererjava:org.apache.pdfbox.rendering.PDFRenderer
                                                PDFTextStripperjava:org.apache.pdfbox.text.PDFTextStripper
                                                PDOutlineItemjava:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem
                                                PDPageLabelsjava:org.apache.pdfbox.pdmodel.common.PDPageLabels
                                                PDPageTreejava:org.apache.pdfbox.pdmodel.PDPageTree
                                                RandomAccessReadBufferedFilejava:org.apache.pdfbox.io.RandomAccessReadBufferedFile
                                                xshttp://www.w3.org/2001/XMLSchema

                                                6 RestXQ

                                                None

                                                Source Code

                                                xquery version '3.1';
                                                -(:~ 
                                                -pdfbox 3.0 https://pdfbox.apache.org/ BaseX 10.7+ interface library, 
                                                -requires pdfbox jar on classpath, tested with pdfbox-app-3.0.3.jar
                                                -@see download https://pdfbox.apache.org/download.cgi
                                                -@javadoc https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.3/
                                                -
                                                -:)
                                                -module namespace pdfbox="org.expkg_zone58.Pdfbox3";
                                                -
                                                -declare namespace Loader ="java:org.apache.pdfbox.Loader"; 
                                                -declare namespace PDFTextStripper = "java:org.apache.pdfbox.text.PDFTextStripper";
                                                -
                                                -(:~ 
                                                -@see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.0/org/apache/pdfbox/pdmodel/PDDocument.html 
                                                -:)
                                                -declare namespace PDDocument ="java:org.apache.pdfbox.pdmodel.PDDocument";
                                                -
                                                -declare namespace PDDocumentCatalog ="java:org.apache.pdfbox.pdmodel.PDDocumentCatalog";
                                                -declare namespace PDPageLabels ="java:org.apache.pdfbox.pdmodel.common.PDPageLabels";
                                                -
                                                -(:~ 
                                                -@see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.0/org/apache/pdfbox/multipdf/PageExtractor.html 
                                                -:)
                                                -declare namespace PageExtractor ="java:org.apache.pdfbox.multipdf.PageExtractor";
                                                - 
                                                -(:~ 
                                                - @see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.0/org/apache/pdfbox/pdmodel/PDPageTree.html
                                                -:)
                                                -declare namespace PDPageTree ="java:org.apache.pdfbox.pdmodel.PDPageTree";
                                                -
                                                -(:~ 
                                                -@see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.2/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/PDDocumentOutline.html 
                                                -:)
                                                -declare namespace PDDocumentOutline ="java:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline";
                                                -
                                                -declare namespace PDDocumentInformation ="java:org.apache.pdfbox.pdmodel.PDDocumentInformation";
                                                -(:~ 
                                                -@see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.0/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/PDOutlineItem.html 
                                                -:)
                                                -declare namespace PDOutlineItem="java:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem";
                                                -declare namespace PDFRenderer="java:org.apache.pdfbox.rendering.PDFRenderer";
                                                -declare namespace RandomAccessReadBufferedFile = "java:org.apache.pdfbox.io.RandomAccessReadBufferedFile";
                                                -declare namespace File ="java:java.io.File";
                                                -
                                                -(:~ version of pdfbox:)
                                                -declare function pdfbox:version()
                                                -as xs:string{
                                                -  Q{java:org.apache.pdfbox.util.Version}getVersion()
                                                -};
                                                -
                                                -(:~ open pdf, returns handle :)
                                                -declare function pdfbox:open($pdfpath as xs:string)
                                                -as item(){
                                                -  Loader:loadPDF( RandomAccessReadBufferedFile:new($pdfpath))
                                                -};
                                                -
                                                -(:~ the PDF specification version this document conforms to.:)
                                                -declare function pdfbox:pdfVersion($doc as item())
                                                -as xs:float{
                                                -  PDDocument:getVersion($doc)
                                                -};
                                                -
                                                -(:~ save pdf $doc to $savepath , returns $savepath :)
                                                -declare function pdfbox:save($doc as item(),$savepath as xs:string)
                                                -as xs:string{
                                                -   PDDocument:save($doc,File:new($savepath)),$savepath
                                                -};
                                                -
                                                -declare function pdfbox:close($doc as item())
                                                -as empty-sequence(){
                                                -  (# db:wrapjava void #) {
                                                -     PDDocument:close($doc)
                                                -  }
                                                -};
                                                -
                                                -declare function pdfbox:page-count($doc as item())
                                                -as xs:integer{
                                                -  PDDocument:getNumberOfPages($doc)
                                                -};
                                                -
                                                -(:~ map with document metadata :)
                                                -declare function pdfbox:information($doc as item())
                                                -as map(*){
                                                -  let $info:=PDDocument:getDocumentInformation($doc)
                                                -  return map{
                                                -    "title": PDDocumentInformation:getTitle($info),
                                                -    "creator": PDDocumentInformation:getCreator($info),
                                                -    "producer": PDDocumentInformation:getProducer($info),
                                                -    "subject": PDDocumentInformation:getSubject($info),
                                                -     "keywords": PDDocumentInformation:getKeywords($info),
                                                -     "creationdate": pdfbox:gregToISO(PDDocumentInformation:getCreationDate($info)),
                                                -    "author": PDDocumentInformation:getAuthor($info)
                                                -  }
                                                -};
                                                -
                                                - (:~ convert date :)
                                                -declare
                                                -function pdfbox:gregToISO($item as item())
                                                -as xs:string{
                                                - Q{java:java.util.GregorianCalendar}toZonedDateTime($item)=>string()
                                                -};
                                                -
                                                -(:~ outline for $doc as map()* :)
                                                -declare function pdfbox:outline($doc as item())
                                                -as map(*)*{
                                                -  (# db:wrapjava some #) {
                                                -  let $outline:=
                                                -                PDDocument:getDocumentCatalog($doc)
                                                -                =>PDDocumentCatalog:getDocumentOutline()
                                                - 
                                                -  return  if(exists($outline))
                                                -          then pdfbox:outline($doc,PDOutlineItem:getFirstChild($outline)) 
                                                -  }
                                                -};
                                                -
                                                -(:~ return bookmark info for children of $outlineItem as seq of maps :)
                                                -declare function pdfbox:outline($doc as item(),$outlineItem as item()?)
                                                -
                                                -as map(*)*{
                                                -  let $find as map(*):=pdfbox:_outline($doc ,$outlineItem)
                                                -  return map:get($find,"list")
                                                -};
                                                -
                                                -(: BaseX bug 10.7? error if inlined in outline :)
                                                -declare function pdfbox:_outline($doc as item(),$outlineItem as item()?)
                                                -as map(*){
                                                - hof:until(
                                                -            function($output) { empty($output?this) },
                                                -            function($input ) { 
                                                -                      let $bk:= pdfbox:bookmark($input?this,$doc)
                                                -                      let $bk:= if($bk?hasChildren)
                                                -                                then let $kids:=pdfbox:outline($doc,PDOutlineItem:getFirstChild($input?this))
                                                -                                     return map:merge(($bk,map:entry("children",$kids)))
                                                -                                else $bk 
                                                -                      return map{
                                                -                            "list": ($input?list, $bk),
                                                -                            "this":  PDOutlineItem:getNextSibling($input?this)}
                                                -                          },
                                                -            map{"list":(),"this":$outlineItem}
                                                -        ) 
                                                -};
                                                -(:~ outline as xml :)
                                                -declare function pdfbox:outline-xml($outline as map(*)*)
                                                -as element(outline){
                                                - element outline { 
                                                -   $outline!pdfbox:bookmark-xml(.)
                                                - }
                                                -};
                                                -
                                                -declare function pdfbox:bookmark-xml($outline as map(*)*)
                                                -as element(bookmark)*
                                                -{
                                                -  $outline!
                                                -  <bookmark title="{?title}" index="{?index}">
                                                -    {?children!pdfbox:bookmark-xml(.)}
                                                -  </bookmark>
                                                -};
                                                -
                                                -(: return bookmark info for children of $outlineItem :)
                                                -declare function pdfbox:bookmark($bookmark as item(),$doc as item())
                                                -as map(*)
                                                -{
                                                - map{ 
                                                -  "index":  PDOutlineItem:findDestinationPage($bookmark,$doc)=>pdfbox:pageIndex($doc),
                                                -  "title":  (# db:checkstrings #) {PDOutlineItem:getTitle($bookmark)}=>translate("�",""),
                                                -  "hasChildren": PDOutlineItem:hasChildren($bookmark)
                                                -  }
                                                -};
                                                -
                                                -declare function pdfbox:outx($page ,$document)
                                                -{
                                                -  let $currentPage := PDOutlineItem:findDestinationPage($page,$document)
                                                -  let $pageNumber := pdfbox:pageIndex($currentPage,$document)
                                                -  return $pageNumber
                                                -};
                                                -
                                                -(:~ pageIndex of $page in $document :)
                                                -declare function pdfbox:pageIndex(
                                                -   $page as item()? (: as java:org.apache.pdfbox.pdmodel.PDPage :),
                                                -   $document)
                                                -as item()?
                                                -{
                                                -  if(exists($page))
                                                -  then PDDocument:getDocumentCatalog($document)
                                                -      =>PDDocumentCatalog:getPages()
                                                -      =>PDPageTree:indexOf($page)
                                                -};            
                                                -
                                                -
                                                -
                                                -(:~ save new PDF doc from 1 based page range 
                                                -@return save path :)
                                                -declare function pdfbox:extract($doc as item(), 
                                                -             $start as xs:integer,$end as xs:integer,$target as xs:string)
                                                -as xs:string
                                                -{
                                                -    let $a:=PageExtractor:new($doc, $start, $end) =>PageExtractor:extract()
                                                -    return (pdfbox:save($a,$target),pdfbox:close($a)) 
                                                -};
                                                -
                                                -
                                                -(:~   pageLabel info
                                                -@see https://www.w3.org/TR/WCAG20-TECHS/PDF17.html#PDF17-examples
                                                -@see https://codereview.stackexchange.com/questions/286078/java-code-showing-page-labels-from-pdf-files
                                                -:)
                                                -declare function pdfbox:getPageLabels($doc as item())
                                                -as item()
                                                -{
                                                -  PDDocument:getDocumentCatalog($doc)
                                                -  =>PDDocumentCatalog:getPageLabels()
                                                -};
                                                -
                                                -(:~   pageLabel for every page:)
                                                -declare function pdfbox:pageLabels($doc as item())
                                                -as xs:string*
                                                -{
                                                -  PDDocument:getDocumentCatalog($doc)
                                                -  =>PDDocumentCatalog:getPageLabels()
                                                -  =>PDPageLabels:getLabelsByPageIndices()
                                                -};
                                                -
                                                -(:~ return text on $pageNo :)
                                                -declare function pdfbox:getText($doc as item(), $pageNo as xs:integer)
                                                -as xs:string{
                                                -  let $tStripper := (# db:wrapjava instance #) {
                                                -         PDFTextStripper:new()
                                                -         => PDFTextStripper:setStartPage($pageNo)
                                                -         => PDFTextStripper:setEndPage($pageNo)
                                                -       }
                                                -  return (# db:checkstrings #) {PDFTextStripper:getText($tStripper,$doc)}
                                                -};
                                                -
                                                -(:~ summary info as map for $pdfpath :)
                                                -declare function pdfbox:report($pdfpath as xs:string)
                                                -as map(*){
                                                - let $doc:=pdfbox:open($pdfpath)
                                                - return (map{
                                                -       "file":  $pdfpath,
                                                -       "pages": pdfbox:page-count($doc),
                                                -       "outline": pdfbox:outline($doc)=>count()
                                                -        },pdfbox:information($doc)
                                                -)=>map:merge()
                                                -};
                                                -
                                                -(:~ java:bufferedImage for $pageNo using $scale times dpi= 72
                                                -@param $pageNo (ZERO based) 
                                                -@param $scale 1=72 dpi 
                                                -@return  Java java.awt.image.BufferedImage object
                                                -:)
                                                -declare function pdfbox:pageBufferedImage($doc as item(), $pageNo as xs:integer,$scale as xs:float)
                                                -as item(){
                                                - PDFRenderer:new($doc)=>PDFRenderer:renderImage($pageNo,$scale)
                                                -};
                                                -
                                                -(:~ save bufferedimage to $dest 
                                                -@param $type = "gif","png" etc:)
                                                -declare function pdfbox:imageSave($bufferedImage as item(),$dest as xs:string,$type as xs:string)
                                                -as xs:boolean{
                                                -  Q{java:javax.imageio.ImageIO}write($bufferedImage , $type,  File:new($dest))
                                                -};
                                                -
                                                -(:~ return image 
                                                -@param $type = "gif","png" etc:)
                                                -declare function pdfbox:imageBinary($bufferedImage as item(),$type as xs:string)
                                                -as xs:base64Binary{
                                                -  let $bytes:=Q{java:java.io.ByteArrayOutputStream}new()
                                                -  let $_:=Q{java:javax.imageio.ImageIO}write($bufferedImage , $type,  $bytes)
                                                -  return Q{java:java.io.ByteArrayOutputStream}toByteArray($bytes)
                                                -         =>convert:integers-to-base64()
                                                -};
                                                \ No newline at end of file diff --git a/docs/xqdoc/modules/F000001/xqdoc.xml b/docs/xqdoc/modules/F000001/xqdoc.xml deleted file mode 100644 index cae5613..0000000 --- a/docs/xqdoc/modules/F000001/xqdoc.xml +++ /dev/null @@ -1,417 +0,0 @@ -2025-01-25T21:36:51.487Z1.1org.expkg_zone58.Pdfbox3pdfboxxquery version '3.1'; -(:~ -pdfbox 3.0 https://pdfbox.apache.org/ BaseX 10.7+ interface library, -requires pdfbox jar on classpath, tested with pdfbox-app-3.0.3.jar -@see download https://pdfbox.apache.org/download.cgi -@javadoc https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.3/ - -:) -module namespace pdfbox="org.expkg_zone58.Pdfbox3"; - -declare namespace Loader ="java:org.apache.pdfbox.Loader"; -declare namespace PDFTextStripper = "java:org.apache.pdfbox.text.PDFTextStripper"; - -(:~ -@see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.0/org/apache/pdfbox/pdmodel/PDDocument.html -:) -declare namespace PDDocument ="java:org.apache.pdfbox.pdmodel.PDDocument"; - -declare namespace PDDocumentCatalog ="java:org.apache.pdfbox.pdmodel.PDDocumentCatalog"; -declare namespace PDPageLabels ="java:org.apache.pdfbox.pdmodel.common.PDPageLabels"; - -(:~ -@see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.0/org/apache/pdfbox/multipdf/PageExtractor.html -:) -declare namespace PageExtractor ="java:org.apache.pdfbox.multipdf.PageExtractor"; - -(:~ - @see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.0/org/apache/pdfbox/pdmodel/PDPageTree.html -:) -declare namespace PDPageTree ="java:org.apache.pdfbox.pdmodel.PDPageTree"; - -(:~ -@see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.2/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/PDDocumentOutline.html -:) -declare namespace PDDocumentOutline ="java:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline"; - -declare namespace PDDocumentInformation ="java:org.apache.pdfbox.pdmodel.PDDocumentInformation"; -(:~ -@see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.0/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/PDOutlineItem.html -:) -declare namespace PDOutlineItem="java:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem"; -declare namespace PDFRenderer="java:org.apache.pdfbox.rendering.PDFRenderer"; -declare namespace RandomAccessReadBufferedFile = "java:org.apache.pdfbox.io.RandomAccessReadBufferedFile"; -declare namespace File ="java:java.io.File"; - -(:~ version of pdfbox:) -declare function pdfbox:version() -as xs:string{ - Q{java:org.apache.pdfbox.util.Version}getVersion() -}; - -(:~ open pdf, returns handle :) -declare function pdfbox:open($pdfpath as xs:string) -as item(){ - Loader:loadPDF( RandomAccessReadBufferedFile:new($pdfpath)) -}; - -(:~ the PDF specification version this document conforms to.:) -declare function pdfbox:pdfVersion($doc as item()) -as xs:float{ - PDDocument:getVersion($doc) -}; - -(:~ save pdf $doc to $savepath , returns $savepath :) -declare function pdfbox:save($doc as item(),$savepath as xs:string) -as xs:string{ - PDDocument:save($doc,File:new($savepath)),$savepath -}; - -declare function pdfbox:close($doc as item()) -as empty-sequence(){ - (# db:wrapjava void #) { - PDDocument:close($doc) - } -}; - -declare function pdfbox:page-count($doc as item()) -as xs:integer{ - PDDocument:getNumberOfPages($doc) -}; - -(:~ map with document metadata :) -declare function pdfbox:information($doc as item()) -as map(*){ - let $info:=PDDocument:getDocumentInformation($doc) - return map{ - "title": PDDocumentInformation:getTitle($info), - "creator": PDDocumentInformation:getCreator($info), - "producer": PDDocumentInformation:getProducer($info), - "subject": PDDocumentInformation:getSubject($info), - "keywords": PDDocumentInformation:getKeywords($info), - "creationdate": pdfbox:gregToISO(PDDocumentInformation:getCreationDate($info)), - "author": PDDocumentInformation:getAuthor($info) - } -}; - - (:~ convert date :) -declare -function pdfbox:gregToISO($item as item()) -as xs:string{ - Q{java:java.util.GregorianCalendar}toZonedDateTime($item)=>string() -}; - -(:~ outline for $doc as map()* :) -declare function pdfbox:outline($doc as item()) -as map(*)*{ - (# db:wrapjava some #) { - let $outline:= - PDDocument:getDocumentCatalog($doc) - =>PDDocumentCatalog:getDocumentOutline() - - return if(exists($outline)) - then pdfbox:outline($doc,PDOutlineItem:getFirstChild($outline)) - } -}; - -(:~ return bookmark info for children of $outlineItem as seq of maps :) -declare function pdfbox:outline($doc as item(),$outlineItem as item()?) - -as map(*)*{ - let $find as map(*):=pdfbox:_outline($doc ,$outlineItem) - return map:get($find,"list") -}; - -(: BaseX bug 10.7? error if inlined in outline :) -declare function pdfbox:_outline($doc as item(),$outlineItem as item()?) -as map(*){ - hof:until( - function($output) { empty($output?this) }, - function($input ) { - let $bk:= pdfbox:bookmark($input?this,$doc) - let $bk:= if($bk?hasChildren) - then let $kids:=pdfbox:outline($doc,PDOutlineItem:getFirstChild($input?this)) - return map:merge(($bk,map:entry("children",$kids))) - else $bk - return map{ - "list": ($input?list, $bk), - "this": PDOutlineItem:getNextSibling($input?this)} - }, - map{"list":(),"this":$outlineItem} - ) -}; -(:~ outline as xml :) -declare function pdfbox:outline-xml($outline as map(*)*) -as element(outline){ - element outline { - $outline!pdfbox:bookmark-xml(.) - } -}; - -declare function pdfbox:bookmark-xml($outline as map(*)*) -as element(bookmark)* -{ - $outline! - <bookmark title="{?title}" index="{?index}"> - {?children!pdfbox:bookmark-xml(.)} - </bookmark> -}; - -(: return bookmark info for children of $outlineItem :) -declare function pdfbox:bookmark($bookmark as item(),$doc as item()) -as map(*) -{ - map{ - "index": PDOutlineItem:findDestinationPage($bookmark,$doc)=>pdfbox:pageIndex($doc), - "title": (# db:checkstrings #) {PDOutlineItem:getTitle($bookmark)}=>translate("�",""), - "hasChildren": PDOutlineItem:hasChildren($bookmark) - } -}; - -declare function pdfbox:outx($page ,$document) -{ - let $currentPage := PDOutlineItem:findDestinationPage($page,$document) - let $pageNumber := pdfbox:pageIndex($currentPage,$document) - return $pageNumber -}; - -(:~ pageIndex of $page in $document :) -declare function pdfbox:pageIndex( - $page as item()? (: as java:org.apache.pdfbox.pdmodel.PDPage :), - $document) -as item()? -{ - if(exists($page)) - then PDDocument:getDocumentCatalog($document) - =>PDDocumentCatalog:getPages() - =>PDPageTree:indexOf($page) -}; - - - -(:~ save new PDF doc from 1 based page range -@return save path :) -declare function pdfbox:extract($doc as item(), - $start as xs:integer,$end as xs:integer,$target as xs:string) -as xs:string -{ - let $a:=PageExtractor:new($doc, $start, $end) =>PageExtractor:extract() - return (pdfbox:save($a,$target),pdfbox:close($a)) -}; - - -(:~ pageLabel info -@see https://www.w3.org/TR/WCAG20-TECHS/PDF17.html#PDF17-examples -@see https://codereview.stackexchange.com/questions/286078/java-code-showing-page-labels-from-pdf-files -:) -declare function pdfbox:getPageLabels($doc as item()) -as item() -{ - PDDocument:getDocumentCatalog($doc) - =>PDDocumentCatalog:getPageLabels() -}; - -(:~ pageLabel for every page:) -declare function pdfbox:pageLabels($doc as item()) -as xs:string* -{ - PDDocument:getDocumentCatalog($doc) - =>PDDocumentCatalog:getPageLabels() - =>PDPageLabels:getLabelsByPageIndices() -}; - -(:~ return text on $pageNo :) -declare function pdfbox:getText($doc as item(), $pageNo as xs:integer) -as xs:string{ - let $tStripper := (# db:wrapjava instance #) { - PDFTextStripper:new() - => PDFTextStripper:setStartPage($pageNo) - => PDFTextStripper:setEndPage($pageNo) - } - return (# db:checkstrings #) {PDFTextStripper:getText($tStripper,$doc)} -}; - -(:~ summary info as map for $pdfpath :) -declare function pdfbox:report($pdfpath as xs:string) -as map(*){ - let $doc:=pdfbox:open($pdfpath) - return (map{ - "file": $pdfpath, - "pages": pdfbox:page-count($doc), - "outline": pdfbox:outline($doc)=>count() - },pdfbox:information($doc) -)=>map:merge() -}; - -(:~ java:bufferedImage for $pageNo using $scale times dpi= 72 -@param $pageNo (ZERO based) -@param $scale 1=72 dpi -@return Java java.awt.image.BufferedImage object -:) -declare function pdfbox:pageBufferedImage($doc as item(), $pageNo as xs:integer,$scale as xs:float) -as item(){ - PDFRenderer:new($doc)=>PDFRenderer:renderImage($pageNo,$scale) -}; - -(:~ save bufferedimage to $dest -@param $type = "gif","png" etc:) -declare function pdfbox:imageSave($bufferedImage as item(),$dest as xs:string,$type as xs:string) -as xs:boolean{ - Q{java:javax.imageio.ImageIO}write($bufferedImage , $type, File:new($dest)) -}; - -(:~ return image -@param $type = "gif","png" etc:) -declare function pdfbox:imageBinary($bufferedImage as item(),$type as xs:string) -as xs:base64Binary{ - let $bytes:=Q{java:java.io.ByteArrayOutputStream}new() - let $_:=Q{java:javax.imageio.ImageIO}write($bufferedImage , $type, $bytes) - return Q{java:java.io.ByteArrayOutputStream}toByteArray($bytes) - =>convert:integers-to-base64() -};pdfbox:versionfunction pdfbox:version ( ) as xs:string { Q{java:org.apache.pdfbox.util.Version}getVersion() }xs:stringfunction pdfbox:version() -as xs:string{ - Q{java:org.apache.pdfbox.util.Version}getVersion() -}pdfbox:openfunction pdfbox:open ( $pdfpath as xs:string ) as item() { Loader:loadPDF( RandomAccessReadBufferedFile:new($pdfpath)) }pdfpathxs:stringitem()function pdfbox:open($pdfpath as xs:string) -as item(){ - Loader:loadPDF( RandomAccessReadBufferedFile:new($pdfpath)) -}pdfbox:pdfVersionfunction pdfbox:pdfVersion ( $doc as item() ) as xs:float { PDDocument:getVersion($doc) }docitem()xs:floatfunction pdfbox:pdfVersion($doc as item()) -as xs:float{ - PDDocument:getVersion($doc) -}pdfbox:savefunction pdfbox:save ( $doc as item(),$savepath as xs:string ) as xs:string { PDDocument:save($doc,File:new($savepath)),$savepath }docitem()savepathxs:stringxs:stringfunction pdfbox:save($doc as item(),$savepath as xs:string) -as xs:string{ - PDDocument:save($doc,File:new($savepath)),$savepath -}pdfbox:closefunction pdfbox:close ( $doc as item() ) as empty-sequence() { (# db:wrapjava void #) { PDDocument:close($doc) } }docitem()empty-sequencefunction pdfbox:close($doc as item()) -as empty-sequence(){ - (# db:wrapjava void #) { - PDDocument:close($doc) - } -}pdfbox:page-countfunction pdfbox:page-count ( $doc as item() ) as xs:integer { PDDocument:getNumberOfPages($doc) }docitem()xs:integerfunction pdfbox:page-count($doc as item()) -as xs:integer{ - PDDocument:getNumberOfPages($doc) -}pdfbox:informationfunction pdfbox:information ( $doc as item() ) as map(*) { let $info:=PDDocument:getDocumentInformation($doc) return map{ "title": PDDocumentInformation:getTitle($info), "creator": PDDocumentInformation:getCreator($info), "producer": PDDocumentInformation:getProducer($info), "subject": PDDocumentInformation:getSubject($info), "keywords": PDDocumentInformation:getKeywords($info), "creationdate": pdfbox:gregToISO(PDDocumentInformation:getCreationDate($info)), "author": PDDocumentInformation:getAuthor($info) } }docitem()map(*)function pdfbox:information($doc as item()) -as map(*){ - let $info:=PDDocument:getDocumentInformation($doc) - return map{ - "title": PDDocumentInformation:getTitle($info), - "creator": PDDocumentInformation:getCreator($info), - "producer": PDDocumentInformation:getProducer($info), - "subject": PDDocumentInformation:getSubject($info), - "keywords": PDDocumentInformation:getKeywords($info), - "creationdate": pdfbox:gregToISO(PDDocumentInformation:getCreationDate($info)), - "author": PDDocumentInformation:getAuthor($info) - } -}pdfbox:gregToISOfunction pdfbox:gregToISO ( $item as item() ) as xs:string { Q{java:java.util.GregorianCalendar}toZonedDateTime($item)=>string() }itemitem()xs:stringfunction pdfbox:gregToISO($item as item()) -as xs:string{ - Q{java:java.util.GregorianCalendar}toZonedDateTime($item)=>string() -}pdfbox:outlinefunction pdfbox:outline ( $doc as item() ) as map(*)* { (# db:wrapjava some #) { let $outline:= PDDocument:getDocumentCatalog($doc) =>PDDocumentCatalog:getDocumentOutline() return if(exists($outline)) then pdfbox:outline($doc,PDOutlineItem:getFirstChild($outline)) } }docitem()map(*)function pdfbox:outline($doc as item()) -as map(*)*{ - (# db:wrapjava some #) { - let $outline:= - PDDocument:getDocumentCatalog($doc) - =>PDDocumentCatalog:getDocumentOutline() - - return if(exists($outline)) - then pdfbox:outline($doc,PDOutlineItem:getFirstChild($outline)) - } -}pdfbox:outlinefunction pdfbox:outline ( $doc as item(),$outlineItem as item()? ) as map(*)* { let $find as map(*):=pdfbox:_outline($doc ,$outlineItem) return map:get($find,"list") }docitem()outlineItemitem()map(*)function pdfbox:outline($doc as item(),$outlineItem as item()?) - -as map(*)*{ - let $find as map(*):=pdfbox:_outline($doc ,$outlineItem) - return map:get($find,"list") -}pdfbox:_outlinefunction pdfbox:_outline ( $doc as item(),$outlineItem as item()? ) as map(*) { hof:until( function($output) { empty($output?this) }, function($input ) { let $bk:= pdfbox:bookmark($input?this,$doc) let $bk:= if($bk?hasChildren) then let $kids:=pdfbox:outline($doc,PDOutlineItem:getFirstChild($input?this)) return map:merge(($bk,map:entry("children",$kids))) else $bk return map{ "list": ($input?list, $bk), "this": PDOutlineItem:getNextSibling($input?this)} }, map{"list":(),"this":$outlineItem} ) }docitem()outlineItemitem()map(*)function pdfbox:_outline($doc as item(),$outlineItem as item()?) -as map(*){ - hof:until( - function($output) { empty($output?this) }, - function($input ) { - let $bk:= pdfbox:bookmark($input?this,$doc) - let $bk:= if($bk?hasChildren) - then let $kids:=pdfbox:outline($doc,PDOutlineItem:getFirstChild($input?this)) - return map:merge(($bk,map:entry("children",$kids))) - else $bk - return map{ - "list": ($input?list, $bk), - "this": PDOutlineItem:getNextSibling($input?this)} - }, - map{"list":(),"this":$outlineItem} - ) -}pdfbox:outline-xmlfunction pdfbox:outline-xml ( $outline as map(*)* ) as element(outline) { element outline { $outline!pdfbox:bookmark-xml(.) } }outlinemap(*)element(outline)function pdfbox:outline-xml($outline as map(*)*) -as element(outline){ - element outline { - $outline!pdfbox:bookmark-xml(.) - } -}pdfbox:bookmark-xmlfunction pdfbox:bookmark-xml ( $outline as map(*)* ) as element(bookmark)* { $outline! <bookmark title="{?title}" index="{?index}"> {?children!pdfbox:bookmark-xml(.)} </bookmark> }outlinemap(*)element(bookmark)function pdfbox:bookmark-xml($outline as map(*)*) -as element(bookmark)* -{ - $outline! - <bookmark title="{?title}" index="{?index}"> - {?children!pdfbox:bookmark-xml(.)} - </bookmark> -}pdfbox:bookmarkfunction pdfbox:bookmark ( $bookmark as item(),$doc as item() ) as map(*) { map{ "index": PDOutlineItem:findDestinationPage($bookmark,$doc)=>pdfbox:pageIndex($doc), "title": (# db:checkstrings #) {PDOutlineItem:getTitle($bookmark)}=>translate("�",""), "hasChildren": PDOutlineItem:hasChildren($bookmark) } }bookmarkitem()docitem()map(*)function pdfbox:bookmark($bookmark as item(),$doc as item()) -as map(*) -{ - map{ - "index": PDOutlineItem:findDestinationPage($bookmark,$doc)=>pdfbox:pageIndex($doc), - "title": (# db:checkstrings #) {PDOutlineItem:getTitle($bookmark)}=>translate("�",""), - "hasChildren": PDOutlineItem:hasChildren($bookmark) - } -}pdfbox:outxfunction pdfbox:outx ( $page ,$document ) { let $currentPage := PDOutlineItem:findDestinationPage($page,$document) let $pageNumber := pdfbox:pageIndex($currentPage,$document) return $pageNumber }pagedocumentfunction pdfbox:outx($page ,$document) -{ - let $currentPage := PDOutlineItem:findDestinationPage($page,$document) - let $pageNumber := pdfbox:pageIndex($currentPage,$document) - return $pageNumber -}pdfbox:pageIndexfunction pdfbox:pageIndex ( $page as item()? (: as java:org.apache.pdfbox.pdmodel.PDPage :), $document ) as item()? { if(exists($page)) then PDDocument:getDocumentCatalog($document) =>PDDocumentCatalog:getPages() =>PDPageTree:indexOf($page) }pageitem()documentitem()function pdfbox:pageIndex( - $page as item()? (: as java:org.apache.pdfbox.pdmodel.PDPage :), - $document) -as item()? -{ - if(exists($page)) - then PDDocument:getDocumentCatalog($document) - =>PDDocumentCatalog:getPages() - =>PDPageTree:indexOf($page) -}pdfbox:extractfunction pdfbox:extract ( $doc as item(), $start as xs:integer,$end as xs:integer,$target as xs:string ) as xs:string { let $a:=PageExtractor:new($doc, $start, $end) =>PageExtractor:extract() return (pdfbox:save($a,$target),pdfbox:close($a)) }docitem()startxs:integerendxs:integertargetxs:stringxs:stringfunction pdfbox:extract($doc as item(), - $start as xs:integer,$end as xs:integer,$target as xs:string) -as xs:string -{ - let $a:=PageExtractor:new($doc, $start, $end) =>PageExtractor:extract() - return (pdfbox:save($a,$target),pdfbox:close($a)) -}pdfbox:getPageLabelsfunction pdfbox:getPageLabels ( $doc as item() ) as item() { PDDocument:getDocumentCatalog($doc) =>PDDocumentCatalog:getPageLabels() }docitem()item()function pdfbox:getPageLabels($doc as item()) -as item() -{ - PDDocument:getDocumentCatalog($doc) - =>PDDocumentCatalog:getPageLabels() -}pdfbox:pageLabelsfunction pdfbox:pageLabels ( $doc as item() ) as xs:string* { PDDocument:getDocumentCatalog($doc) =>PDDocumentCatalog:getPageLabels() =>PDPageLabels:getLabelsByPageIndices() }docitem()xs:stringfunction pdfbox:pageLabels($doc as item()) -as xs:string* -{ - PDDocument:getDocumentCatalog($doc) - =>PDDocumentCatalog:getPageLabels() - =>PDPageLabels:getLabelsByPageIndices() -}pdfbox:getTextfunction pdfbox:getText ( $doc as item(), $pageNo as xs:integer ) as xs:string { let $tStripper := (# db:wrapjava instance #) { PDFTextStripper:new() => PDFTextStripper:setStartPage($pageNo) => PDFTextStripper:setEndPage($pageNo) } return (# db:checkstrings #) {PDFTextStripper:getText($tStripper,$doc)} }docitem()pageNoxs:integerxs:stringfunction pdfbox:getText($doc as item(), $pageNo as xs:integer) -as xs:string{ - let $tStripper := (# db:wrapjava instance #) { - PDFTextStripper:new() - => PDFTextStripper:setStartPage($pageNo) - => PDFTextStripper:setEndPage($pageNo) - } - return (# db:checkstrings #) {PDFTextStripper:getText($tStripper,$doc)} -}pdfbox:reportfunction pdfbox:report ( $pdfpath as xs:string ) as map(*) { let $doc:=pdfbox:open($pdfpath) return (map{ "file": $pdfpath, "pages": pdfbox:page-count($doc), "outline": pdfbox:outline($doc)=>count() },pdfbox:information($doc) )=>map:merge() }pdfpathxs:stringmap(*)function pdfbox:report($pdfpath as xs:string) -as map(*){ - let $doc:=pdfbox:open($pdfpath) - return (map{ - "file": $pdfpath, - "pages": pdfbox:page-count($doc), - "outline": pdfbox:outline($doc)=>count() - },pdfbox:information($doc) -)=>map:merge() -}pdfbox:pageBufferedImagefunction pdfbox:pageBufferedImage ( $doc as item(), $pageNo as xs:integer,$scale as xs:float ) as item() { PDFRenderer:new($doc)=>PDFRenderer:renderImage($pageNo,$scale) }docitem()pageNoxs:integerscalexs:floatitem()function pdfbox:pageBufferedImage($doc as item(), $pageNo as xs:integer,$scale as xs:float) -as item(){ - PDFRenderer:new($doc)=>PDFRenderer:renderImage($pageNo,$scale) -}pdfbox:imageSavefunction pdfbox:imageSave ( $bufferedImage as item(),$dest as xs:string,$type as xs:string ) as xs:boolean { Q{java:javax.imageio.ImageIO}write($bufferedImage , $type, File:new($dest)) }bufferedImageitem()destxs:stringtypexs:stringxs:booleanfunction pdfbox:imageSave($bufferedImage as item(),$dest as xs:string,$type as xs:string) -as xs:boolean{ - Q{java:javax.imageio.ImageIO}write($bufferedImage , $type, File:new($dest)) -}pdfbox:imageBinaryfunction pdfbox:imageBinary ( $bufferedImage as item(),$type as xs:string ) as xs:base64Binary { let $bytes:=Q{java:java.io.ByteArrayOutputStream}new() let $_:=Q{java:javax.imageio.ImageIO}write($bufferedImage , $type, $bytes) return Q{java:java.io.ByteArrayOutputStream}toByteArray($bytes) =>convert:integers-to-base64() }bufferedImageitem()typexs:stringxs:base64Binaryfunction pdfbox:imageBinary($bufferedImage as item(),$type as xs:string) -as xs:base64Binary{ - let $bytes:=Q{java:java.io.ByteArrayOutputStream}new() - let $_:=Q{java:javax.imageio.ImageIO}write($bufferedImage , $type, $bytes) - return Q{java:java.io.ByteArrayOutputStream}toByteArray($bytes) - =>convert:integers-to-base64() -} \ No newline at end of file diff --git a/docs/xqdoc/modules/F000001/xqparse.xml b/docs/xqdoc/modules/F000001/xqparse.xml deleted file mode 100644 index f6c768a..0000000 --- a/docs/xqdoc/modules/F000001/xqparse.xml +++ /dev/null @@ -1,271 +0,0 @@ -xquery version '3.1'; -(:~ -pdfbox 3.0 https://pdfbox.apache.org/ BaseX 10.7+ interface library, -requires pdfbox jar on classpath, tested with pdfbox-app-3.0.3.jar -@see download https://pdfbox.apache.org/download.cgi -@javadoc https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.3/ - -:) -module namespace pdfbox="org.expkg_zone58.Pdfbox3"; - -declare namespace Loader ="java:org.apache.pdfbox.Loader"; -declare namespace PDFTextStripper = "java:org.apache.pdfbox.text.PDFTextStripper"; - -(:~ -@see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.0/org/apache/pdfbox/pdmodel/PDDocument.html -:) -declare namespace PDDocument ="java:org.apache.pdfbox.pdmodel.PDDocument"; - -declare namespace PDDocumentCatalog ="java:org.apache.pdfbox.pdmodel.PDDocumentCatalog"; -declare namespace PDPageLabels ="java:org.apache.pdfbox.pdmodel.common.PDPageLabels"; - -(:~ -@see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.0/org/apache/pdfbox/multipdf/PageExtractor.html -:) -declare namespace PageExtractor ="java:org.apache.pdfbox.multipdf.PageExtractor"; - -(:~ - @see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.0/org/apache/pdfbox/pdmodel/PDPageTree.html -:) -declare namespace PDPageTree ="java:org.apache.pdfbox.pdmodel.PDPageTree"; - -(:~ -@see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.2/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/PDDocumentOutline.html -:) -declare namespace PDDocumentOutline ="java:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline"; - -declare namespace PDDocumentInformation ="java:org.apache.pdfbox.pdmodel.PDDocumentInformation"; -(:~ -@see https://javadoc.io/static/org.apache.pdfbox/pdfbox/3.0.0/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/PDOutlineItem.html -:) -declare namespace PDOutlineItem="java:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem"; -declare namespace PDFRenderer="java:org.apache.pdfbox.rendering.PDFRenderer"; -declare namespace RandomAccessReadBufferedFile = "java:org.apache.pdfbox.io.RandomAccessReadBufferedFile"; -declare namespace File ="java:java.io.File"; - -(:~ version of pdfbox:) -declare function pdfbox:version() -as xs:string{ - Q{java:org.apache.pdfbox.util.Version}getVersion() -}; - -(:~ open pdf, returns handle :) -declare function pdfbox:open($pdfpath as xs:string) -as item(){ - Loader:loadPDF( RandomAccessReadBufferedFile:new($pdfpath)) -}; - -(:~ the PDF specification version this document conforms to.:) -declare function pdfbox:pdfVersion($doc as item()) -as xs:float{ - PDDocument:getVersion($doc) -}; - -(:~ save pdf $doc to $savepath , returns $savepath :) -declare function pdfbox:save($doc as item(),$savepath as xs:string) -as xs:string{ - PDDocument:save($doc,File:new($savepath)),$savepath -}; - -declare function pdfbox:close($doc as item()) -as empty-sequence(){ - (# db:wrapjava void #) { - PDDocument:close($doc) - } -}; - -declare function pdfbox:page-count($doc as item()) -as xs:integer{ - PDDocument:getNumberOfPages($doc) -}; - -(:~ map with document metadata :) -declare function pdfbox:information($doc as item()) -as map(*){ - let $info:=PDDocument:getDocumentInformation($doc) - return map{ - "title": PDDocumentInformation:getTitle($info), - "creator": PDDocumentInformation:getCreator($info), - "producer": PDDocumentInformation:getProducer($info), - "subject": PDDocumentInformation:getSubject($info), - "keywords": PDDocumentInformation:getKeywords($info), - "creationdate": pdfbox:gregToISO(PDDocumentInformation:getCreationDate($info)), - "author": PDDocumentInformation:getAuthor($info) - } -}; - - (:~ convert date :) -declare -function pdfbox:gregToISO($item as item()) -as xs:string{ - Q{java:java.util.GregorianCalendar}toZonedDateTime($item)=>string() -}; - -(:~ outline for $doc as map()* :) -declare function pdfbox:outline($doc as item()) -as map(*)*{ - (# db:wrapjava some #) { - let $outline:= - PDDocument:getDocumentCatalog($doc) - =>PDDocumentCatalog:getDocumentOutline() - - return if(exists($outline)) - then pdfbox:outline($doc,PDOutlineItem:getFirstChild($outline)) - } -}; - -(:~ return bookmark info for children of $outlineItem as seq of maps :) -declare function pdfbox:outline($doc as item(),$outlineItem as item()?) - -as map(*)*{ - let $find as map(*):=pdfbox:_outline($doc ,$outlineItem) - return map:get($find,"list") -}; - -(: BaseX bug 10.7? error if inlined in outline :) -declare function pdfbox:_outline($doc as item(),$outlineItem as item()?) -as map(*){ - hof:until( - function($output) { empty($output?this) }, - function($input ) { - let $bk:= pdfbox:bookmark($input?this,$doc) - let $bk:= if($bk?hasChildren) - then let $kids:=pdfbox:outline($doc,PDOutlineItem:getFirstChild($input?this)) - return map:merge(($bk,map:entry("children",$kids))) - else $bk - return map{ - "list": ($input?list, $bk), - "this": PDOutlineItem:getNextSibling($input?this)} - }, - map{"list":(),"this":$outlineItem} - ) -}; -(:~ outline as xml :) -declare function pdfbox:outline-xml($outline as map(*)*) -as element(outline){ - element outline { - $outline!pdfbox:bookmark-xml(.) - } -}; - -declare function pdfbox:bookmark-xml($outline as map(*)*) -as element(bookmark)* -{ - $outline! - <bookmark title="{?title}" index="{?index}"> - {?children!pdfbox:bookmark-xml(.)} - </bookmark> -}; - -(: return bookmark info for children of $outlineItem :) -declare function pdfbox:bookmark($bookmark as item(),$doc as item()) -as map(*) -{ - map{ - "index": PDOutlineItem:findDestinationPage($bookmark,$doc)=>pdfbox:pageIndex($doc), - "title": (# db:checkstrings #) {PDOutlineItem:getTitle($bookmark)}=>translate("�",""), - "hasChildren": PDOutlineItem:hasChildren($bookmark) - } -}; - -declare function pdfbox:outx($page ,$document) -{ - let $currentPage := PDOutlineItem:findDestinationPage($page,$document) - let $pageNumber := pdfbox:pageIndex($currentPage,$document) - return $pageNumber -}; - -(:~ pageIndex of $page in $document :) -declare function pdfbox:pageIndex( - $page as item()? (: as java:org.apache.pdfbox.pdmodel.PDPage :), - $document) -as item()? -{ - if(exists($page)) - then PDDocument:getDocumentCatalog($document) - =>PDDocumentCatalog:getPages() - =>PDPageTree:indexOf($page) -}; - - - -(:~ save new PDF doc from 1 based page range -@return save path :) -declare function pdfbox:extract($doc as item(), - $start as xs:integer,$end as xs:integer,$target as xs:string) -as xs:string -{ - let $a:=PageExtractor:new($doc, $start, $end) =>PageExtractor:extract() - return (pdfbox:save($a,$target),pdfbox:close($a)) -}; - - -(:~ pageLabel info -@see https://www.w3.org/TR/WCAG20-TECHS/PDF17.html#PDF17-examples -@see https://codereview.stackexchange.com/questions/286078/java-code-showing-page-labels-from-pdf-files -:) -declare function pdfbox:getPageLabels($doc as item()) -as item() -{ - PDDocument:getDocumentCatalog($doc) - =>PDDocumentCatalog:getPageLabels() -}; - -(:~ pageLabel for every page:) -declare function pdfbox:pageLabels($doc as item()) -as xs:string* -{ - PDDocument:getDocumentCatalog($doc) - =>PDDocumentCatalog:getPageLabels() - =>PDPageLabels:getLabelsByPageIndices() -}; - -(:~ return text on $pageNo :) -declare function pdfbox:getText($doc as item(), $pageNo as xs:integer) -as xs:string{ - let $tStripper := (# db:wrapjava instance #) { - PDFTextStripper:new() - => PDFTextStripper:setStartPage($pageNo) - => PDFTextStripper:setEndPage($pageNo) - } - return (# db:checkstrings #) {PDFTextStripper:getText($tStripper,$doc)} -}; - -(:~ summary info as map for $pdfpath :) -declare function pdfbox:report($pdfpath as xs:string) -as map(*){ - let $doc:=pdfbox:open($pdfpath) - return (map{ - "file": $pdfpath, - "pages": pdfbox:page-count($doc), - "outline": pdfbox:outline($doc)=>count() - },pdfbox:information($doc) -)=>map:merge() -}; - -(:~ java:bufferedImage for $pageNo using $scale times dpi= 72 -@param $pageNo (ZERO based) -@param $scale 1=72 dpi -@return Java java.awt.image.BufferedImage object -:) -declare function pdfbox:pageBufferedImage($doc as item(), $pageNo as xs:integer,$scale as xs:float) -as item(){ - PDFRenderer:new($doc)=>PDFRenderer:renderImage($pageNo,$scale) -}; - -(:~ save bufferedimage to $dest -@param $type = "gif","png" etc:) -declare function pdfbox:imageSave($bufferedImage as item(),$dest as xs:string,$type as xs:string) -as xs:boolean{ - Q{java:javax.imageio.ImageIO}write($bufferedImage , $type, File:new($dest)) -}; - -(:~ return image -@param $type = "gif","png" etc:) -declare function pdfbox:imageBinary($bufferedImage as item(),$type as xs:string) -as xs:base64Binary{ - let $bytes:=Q{java:java.io.ByteArrayOutputStream}new() - let $_:=Q{java:javax.imageio.ImageIO}write($bufferedImage , $type, $bytes) - return Q{java:java.io.ByteArrayOutputStream}toByteArray($bytes) - =>convert:integers-to-base64() -}; \ No newline at end of file diff --git a/docs/xqdoc/resources/base.css b/docs/xqdoc/resources/base.css deleted file mode 100644 index ff84327..0000000 --- a/docs/xqdoc/resources/base.css +++ /dev/null @@ -1,1153 +0,0 @@ -/****************************************************************************** - * Style sheet for the W3C specifications * - * - * Special classes handled by this style sheet include: - * - * Indices - * - .toc for the Table of Contents (
                                                  ) - * + for the section numbers - * - #toc for the Table of Contents (