From 0ae74baba304c4ee7a4063d95c78e9814ed0cd1f Mon Sep 17 00:00:00 2001 From: Andy Bunce Date: Wed, 4 Jun 2025 11:59:13 +0100 Subject: [PATCH] [fix] tests --- docs/xqdoc/annotations.html | 4 +- docs/xqdoc/imports.html | 2 +- docs/xqdoc/index.html | 8 ++-- docs/xqdoc/modules/F000001/index.html | 65 +++++++++++++------------- docs/xqdoc/modules/F000001/xqdoc.xml | 62 ++++++++++++------------ docs/xqdoc/modules/F000001/xqparse.xml | 30 ++++++------ docs/xqdoc/restxq.html | 2 +- docs/xqdoc/validation-report.xml | 2 +- docs/xqdoc/xqdoca.xml | 2 +- readme.md | 32 ++++++------- src/Pdfbox3.xqm | 38 ++++++++------- tests/test.xqm | 18 +++---- 12 files changed, 133 insertions(+), 132 deletions(-) diff --git a/docs/xqdoc/annotations.html b/docs/xqdoc/annotations.html index 1830185..f45c54b 100644 --- a/docs/xqdoc/annotations.html +++ b/docs/xqdoc/annotations.html @@ -6,6 +6,6 @@ / Annotations importsimports-diagimports-diag.mmdreportrestxqxqdoc-validatexqdoca.xml

Contents -

  1. Summary
  2. Annotations
    1. 2.1 http://www.w3.org/2012/xquery

Summary

This project uses 1 annotation namespaces.

Related documents
ViewDescriptionFormat
reportIndex of sourcesxhtml
restxqSummary of REST interfacexhtml
importsSummary of import usagexhtml
imports-diagProject wide module imports as html mermaid class diagramhtml5
imports-diag.mmdProject wide module imports as a mermaid class diagramtext
xqdoca.xmlxqDocA run configuration report (XML)xml
xqdoc-validatevalidate generated xqdoc filesxml

Annotations

2.1 http://www.w3.org/2012/xquery

private
\ No newline at end of file +   on Wednesday, 4th June 2025

\ No newline at end of file diff --git a/docs/xqdoc/imports.html b/docs/xqdoc/imports.html index f8924ef..f602dd3 100644 --- a/docs/xqdoc/imports.html +++ b/docs/xqdoc/imports.html @@ -6,4 +6,4 @@ Contents
  1. Summary
  2. Imports

    Summary

    Lists all modules imported.

    Related documents
    ViewDescriptionFormat
    reportIndex of sourcesxhtml
    restxqSummary of REST interfacexhtml
    imports-diagProject wide module imports as html mermaid class diagramhtml5
    imports-diag.mmdProject wide module imports as a mermaid class diagramtext
    annotationsSummary of XQuery annotation usexhtml
    xqdoca.xmlxqDocA run configuration report (XML)xml
    xqdoc-validatevalidate generated xqdoc filesxml

    Imports (0)

    \ No newline at end of file +   on Wednesday, 4th June 2025

    \ No newline at end of file diff --git a/docs/xqdoc/index.html b/docs/xqdoc/index.html index f2a36ba..09f2ec5 100644 --- a/docs/xqdoc/index.html +++ b/docs/xqdoc/index.html @@ -6,9 +6,9 @@ 1 XQuery source files, and uses 1 annotation namespaces.

    This document was built from source folder C:/Users/mrwhe/git/expkg-zone58/pdfbox/src/ on - Tuesday, 3rd June 2025.

    Related documents
    ViewDescriptionFormat
    reportIndex of sourcesxhtml
    restxqSummary of REST interfacexhtml
    importsSummary of import usagexhtml
    imports-diagProject wide module imports as html mermaid class diagramhtml5
    imports-diag.mmdProject wide module imports as a mermaid class diagramtext
    annotationsSummary of XQuery annotation usexhtml
    xqdoca.xmlxqDocA run configuration report (XML)xml
    xqdoc-validatevalidate generated xqdoc filesxml

    XQuery Main (0)

    None

    XQuery Library (1)

    UriPrefixDescriptionUseAMetrics
    org.expkg_zone58.Pdfbox3pdfbox + Wednesday, 4th June 2025.

    Related documents
    ViewDescriptionFormat
    reportIndex of sourcesxhtml
    restxqSummary of REST interfacexhtml
    importsSummary of import usagexhtml
    imports-diagProject wide module imports as html mermaid class diagramhtml5
    imports-diag.mmdProject wide module imports as a mermaid class diagramtext
    annotationsSummary of XQuery annotation usexhtml
    xqdoca.xmlxqDocA run configuration report (XML)xml
    xqdoc-validatevalidate generated xqdoc filesxml

    XQuery Main (0)

    None

    XQuery Library (1)

    UriPrefixDescriptionUseAMetrics
    org.expkg_zone58.Pdfbox3pdfbox -A BaseX 10.7+ interface to pdfbox3 https://...
    0
    Library
    ↖0
    P
    V#1
    F#37

    File view (1)

    Annotation namespaces (1)

    A total of 7 annotations are defined. -

    http://www.w3.org/2012/xquery

    0
    Library
    ↖0
    P
    V#1
    F#37

    File view (1)

    Annotation namespaces (1)

    A total of 8 annotations are defined. +

    http://www.w3.org/2012/xquery

    private8
    \ No newline at end of file +   on Wednesday, 4th June 2025

    \ No newline at end of file diff --git a/docs/xqdoc/modules/F000001/index.html b/docs/xqdoc/modules/F000001/index.html index 0ed301c..2be907e 100644 --- a/docs/xqdoc/modules/F000001/index.html +++ b/docs/xqdoc/modules/F000001/index.html @@ -1,7 +1,7 @@ src - xqDocA - xqDocA

    org.expkg_zone58.Pdfbox3  library module
    P

    Summary

    +

    org.expkg_zone58.Pdfbox3

    1. 1 Summary
    2. 2 Imports
    3. 3 Variables
      1. 3.1$pdfbox:property-map
        P
    4. 4 Functions
      1. 4.1binary
      2. 4.2bookmark
        P
      3. 4.3bookmark-xml
        P
      4. 4.4close
      5. 4.5do-until
        P
      6. 4.6extract-range
      7. 4.7find-page
      8. 4.8gregToISO
        P
      9. 4.9label-as-map
      10. 4.10label-as-string
      11. 4.11labels-as-map
      12. 4.12labels-as-strings
      13. 4.13labels-by-page
      14. 4.14metadata
      15. 4.15number-of-bookmarks
      16. 4.16number-of-labels
      17. 4.17number-of-pages
      18. 4.18open
      19. 4.19outline
        P
      20. 4.20outline-xml
      21. 4.21outline_
        P
      22. 4.22page-labels
      23. 4.23page-media-box
      24. 4.24page-render
      25. 4.25page-text
      26. 4.26pdf-save
      27. 4.27property
      28. 4.28property-names
      29. 4.29read-stream
        P
      30. 4.30report
      31. 4.31report-save
      32. 4.32specification
      33. 4.33version
      34. 4.34with-pdf
    5. 5 Namespaces
    6. 6 RestXQ
    7. 7 Source

    Summary

    A BaseX 10.7+ interface to pdfbox3 https://pdfbox.apache.org/ , requires pdfbox jars on classpath, in lib/custom or xar @@ -13,9 +13,9 @@ refer to the same concept. Also label and (page)range are used interchangably&#x 0 modules. It imports 0 modules.

    Variables

    3.1 $pdfbox:property-map

    Summary
    -property access map -keys are property names, -values are sequences of functions to get property from $pdf object +Defines a map from property names to evaluation method. +Keys are property names, +values are sequences of functions to get property value starting from a $pdf object.
    Type
    References 15 functions from 3 modules
    • {java:org.apache.pdfbox.pdmodel.PDDocumentInformation}getAuthor#1
    • {java:org.apache.pdfbox.pdmodel.PDDocumentInformation}getCreationDate#1
    • {java:org.apache.pdfbox.pdmodel.PDDocumentInformation}getCreator#1
    • {java:org.apache.pdfbox.pdmodel.PDDocumentInformation}getKeywords#1
    • {java:org.apache.pdfbox.pdmodel.PDDocumentInformation}getModificationDate#1
    • {java:org.apache.pdfbox.pdmodel.PDDocumentInformation}getProducer#1
    • {java:org.apache.pdfbox.pdmodel.PDDocumentInformation}getSubject#1
    • {java:org.apache.pdfbox.pdmodel.PDDocumentInformation}getTitle#1
    • {java:org.apache.pdfbox.pdmodel.PDDocument}getDocumentInformation#1
    • pdfbox:gregToISO#1
    • pdfbox:labels-as-strings#1
    • pdfbox:number-of-bookmarks#1
    • pdfbox:number-of-labels#1
    • pdfbox:number-of-pages#1
    • pdfbox:specification#1
    Annotations (1)
    %private()
    Source ( 36 lines)
    variable $pdfbox:property-map:=map{
       "#pages": pdfbox:number-of-pages#1,
     
    @@ -141,7 +141,7 @@ as xs:string?{
      then Q{java:java.util.GregorianCalendar}toZonedDateTime($item)=>string()
      else ()
     }

    4.9 pdfbox:label-as-map

    Arities: #2

    Summary
    -express label/page-range for $page as map
    Signatures
    pdfbox:label-as-map +label/page-range for $page as map
    Signatures
    pdfbox:label-as-map ( $pagelabels, $page as xs:integer ) as map(*)
    Parameters
    • pagelabels as 
    • page as xs:integer
    Return
    • map(*)
    Referenced by 1 functions from 1 modules
    References 5 functions from 3 modules
    • {http://www.w3.org/2005/xpath-functions}empty#1
    • {java:org.apache.pdfbox.pdmodel.common.PDPageLabelRange}getPrefix#1
    • {java:org.apache.pdfbox.pdmodel.common.PDPageLabelRange}getStart#1
    • {java:org.apache.pdfbox.pdmodel.common.PDPageLabelRange}getStyle#1
    • {java:org.apache.pdfbox.pdmodel.common.PDPageLabels}getPageLabelRange#2
    Source ( 13 lines)
    function pdfbox:label-as-map($pagelabels,$page as  xs:integer)
     as map(*)
    @@ -156,7 +156,7 @@ as map(*)
           "style":  PDPageLabelRange:getStyle($label)
           }
     }

    4.10 pdfbox:label-as-string

    Arities: #2

    Summary
    -label for $page formated as string
    Signatures
    pdfbox:label-as-string +label for $page formated as string, empty if none
    Signatures
    pdfbox:label-as-string ( $pagelabels, $page as xs:integer ) as xs:string?
    Parameters
    • pagelabels as 
    • page as xs:integer
    Return
    • xs:string ?
    Referenced by 1 functions from 1 modules
    References 7 functions from 3 modules
    • {http://www.w3.org/2005/xpath-functions}empty#1
    • {http://www.w3.org/2005/xpath-functions}exists#1
    • {http://www.w3.org/2005/xpath-functions}string-join#1
    • {java:org.apache.pdfbox.pdmodel.common.PDPageLabelRange}getPrefix#1
    • {java:org.apache.pdfbox.pdmodel.common.PDPageLabelRange}getStart#1
    • {java:org.apache.pdfbox.pdmodel.common.PDPageLabelRange}getStyle#1
    • {java:org.apache.pdfbox.pdmodel.common.PDPageLabels}getPageLabelRange#2
    Source ( 15 lines)
    function pdfbox:label-as-string($pagelabels,$page as  xs:integer)
     as xs:string?{
    @@ -173,7 +173,7 @@ as xs:string?{
                                     if(exists($prefix)) then '*' || $prefix  (:TODO double " :)
                         ))
     }

    4.11 pdfbox:labels-as-map

    Arities: #1

    Summary
    -sequence of maps for each label in
    Signatures
    pdfbox:labels-as-map +sequence of maps for each label/page range defined in $pdf
    Signatures
    pdfbox:labels-as-map ( $pdf as item() ) as map(*)*
    Parameters
    • pdf as item()
    Return
    • map(*) *
    Referenced by 0 functions from 0 modules
      References 3 functions from 2 modules
      Source ( 8 lines)
      function pdfbox:labels-as-map($pdf as item())
       as map(*)*{
      @@ -226,7 +226,7 @@ as document-node(element(*))?
                            )?data=>parse-xml()
                 else ()
       }

      4.15 pdfbox:number-of-bookmarks

      Arities: #1

      Summary
      -number of outline items
      Signatures
      pdfbox:number-of-bookmarks +The number of outline items defined in $pdf
      Signatures
      pdfbox:number-of-bookmarks ( $pdf as item() ) as xs:integer
      Parameters
      • pdf as item()
      Return
      • xs:integer
      Referenced by 0 functions from 0 modules
        References 2 functions from 2 modules
        Source ( 5 lines)
        function pdfbox:number-of-bookmarks($pdf as item())
         as xs:integer{
        @@ -274,12 +274,12 @@ as item(){
                       else $pdfsrc
             return error(xs:QName("pdfbox:open"),"Failed PDF load " || $loc || " " || $err:description)
         }
        -}

        4.19 pdfbox:outline

        Arities: #1#2

        Summary
        -outline for $pdf as map()*
        Signatures
        pdfbox:outline +}

        4.19 pdfbox:outline

        Arities: #1#2P

        Summary
        +Return outline for $pdf as map()*
        Signatures
        pdfbox:outline ( $pdf as item() ) as map(*)*
        pdfbox:outline ( - $pdf as item(), $outlineItem as item()? ) as map(*)*
        Parameters
        • pdf as item()
        • outlineItem as item()?
        Return
        • map(*) *
        Referenced by 3 functions from 1 modules
        References 6 functions from 5 modules
        • {http://www.w3.org/2005/xpath-functions/map}get#2
        • {http://www.w3.org/2005/xpath-functions}exists#1
        • {java:org.apache.pdfbox.pdmodel.PDDocument}getDocumentCatalog#1
        • {java:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem}getFirstChild#1
        • pdfbox:outline#2
        • pdfbox:outline_#2
        Source ( 16 lines)
        function pdfbox:outline($pdf as item())
        +			$pdf as item(), $outlineItem as item()? ) as map(*)*
        Parameters
        • pdf as item()
        • outlineItem as item()?
        Return
        • map(*) *
        Referenced by 3 functions from 1 modules
        References 6 functions from 5 modules
        • {http://www.w3.org/2005/xpath-functions/map}get#2
        • {http://www.w3.org/2005/xpath-functions}exists#1
        • {java:org.apache.pdfbox.pdmodel.PDDocument}getDocumentCatalog#1
        • {java:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem}getFirstChild#1
        • pdfbox:outline#2
        • pdfbox:outline_#2
        Annotations (1)
        %private()
        Source ( 16 lines)
        function pdfbox:outline($pdf as item())
         as map(*)*{
           (# db:wrapjava some #) {
           let $outline:=
        @@ -303,7 +303,7 @@ as element(outline)?{
                  then <outline>{$outline!pdfbox:bookmark-xml(.)}</outline>
                  else ()
         }

        4.21 pdfbox:outline_

        Arities: #2P

        Summary
        -BaseX bug 10.7? error if inlined in outline
        Signatures
        pdfbox:outline_ +outline helper. BaseX bug 10.7? error if inlined in outline
        Signatures
        pdfbox:outline_ ( $pdf as item(), $outlineItem as item()? ) as map(*)
        Parameters
        • pdf as item()
        • outlineItem as item()?
        Return
        • map(*)
        Referenced by 1 functions from 1 modules
        References 8 functions from 4 modules
        • {http://www.w3.org/2005/xpath-functions/map}entry#2
        • {http://www.w3.org/2005/xpath-functions/map}merge#1
        • {http://www.w3.org/2005/xpath-functions}empty#1
        • {java:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem}getFirstChild#1
        • {java:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem}getNextSibling#1
        • pdfbox:bookmark#2
        • pdfbox:do-until#3
        • pdfbox:outline#2
        Annotations (1)
        %private()
        Source ( 20 lines)
        function pdfbox:outline_($pdf as item(),$outlineItem as item()?)
         as map(*){
        @@ -335,8 +335,7 @@ get pagelabels exist
        Signatures
        Signatures
        pdfbox:page-media-box ( - $pdf as item(), $pageNo as xs:integer ) as xs:string
        Parameters
        • pdf as item()
        • pageNo as xs:integer
        Return
        • xs:string
        Tags
        • @result: - e.g. [0.0,0.0,168.0,239.52]
        Referenced by 0 functions from 0 modules
          References 1 functions from 1 modules
          • {java:org.apache.pdfbox.pdmodel.PDDocument}getPage#2
          Source ( 6 lines)
          function pdfbox:page-media-box($pdf as item(), $pageNo as xs:integer)
          +			$pdf as item(), $pageNo as xs:integer ) as xs:string
          Parameters
          • pdf as item()
          • pageNo as xs:integer
          Return
          • xs:string e.g. [0.0,0.0,168.0,239.52]
          Referenced by 0 functions from 0 modules
            References 1 functions from 1 modules
            • {java:org.apache.pdfbox.pdmodel.PDDocument}getPage#2
            Source ( 6 lines)
            function pdfbox:page-media-box($pdf as item(), $pageNo as xs:integer)
             as xs:string{
               PDDocument:getPage($pdf, $pageNo)
               =>PDPage:getMediaBox()
            @@ -373,7 +372,7 @@ Save pdf $pdf to filesystem at $savepath , returns $savepath

            4.27 pdfbox:property

            Arities: #2

            Summary
            -return value of $property for $pdf
            Signatures
            pdfbox:property +Return the value of $property for $pdf
            Signatures
            pdfbox:property ( $pdf as item(), $property as xs:string ) as item()*
            Parameters
            • pdf as item()
            • property as xs:string
            Return
            • item() *
            Referenced by 1 functions from 1 modules
            References 5 functions from 2 modules
            • {http://www.w3.org/2001/XMLSchema}QName#1
            • {http://www.w3.org/2005/xpath-functions}concat#3
            • {http://www.w3.org/2005/xpath-functions}error#2
            • {http://www.w3.org/2005/xpath-functions}exists#1
            • {http://www.w3.org/2005/xpath-functions}fold-left#3
            Source ( 9 lines)
            function pdfbox:property($pdf as item(),$property as xs:string)
             as item()*{
            @@ -384,7 +383,7 @@ as item()*{
                                     function($result,$this as function(*)){$result!$this(.)})
                      else error(xs:QName('pdfbox:property'),concat("Property '",$property,"' not defined."))
             }

            4.28 pdfbox:property-names

            Arities: #0

            Summary
            -known property names sorted
            Signatures
            pdfbox:property-names +Defined property names, sorted
            Signatures
            pdfbox:property-names ( ) as xs:string*
            Return
            • xs:string *
            Referenced by 1 functions from 1 modules
            Source ( 4 lines)
            function pdfbox:property-names() 
             as xs:string*{
            @@ -432,7 +431,7 @@ as map(*){
                            
               }
             }

            4.31 pdfbox:report-save

            Arities: #2

            Summary
            -convenience function to save report() data to file
            Signatures
            pdfbox:report-save +Convenience function to save report() data to file
            Signatures
            pdfbox:report-save ( $data as map(*), $dest as xs:string ) as empty-sequence
            Parameters
            • data as map(*)
            • dest as xs:string
            Return
            • empty-sequence
            Referenced by 0 functions from 0 modules
              References 2 functions from 2 modules
              • {http://basex.org/modules/csv}serialize#2
              • {http://expath.org/ns/file}write-text#2
              Source ( 5 lines)
              function pdfbox:report-save($data as map(*),$dest as xs:string)
               as empty-sequence(){
              @@ -609,9 +608,9 @@ as xs:base64Binary{
               };
               
               
              -(:~ property access map
              -   keys are property names, 
              -   values are sequences of functions to get property from $pdf object
              +(:~ Defines a map from property names to evaluation method.
              +   Keys are property names, 
              +   values are sequences of functions to get property value starting from a $pdf object.
               :)
               declare %private variable $pdfbox:property-map:=map{
                 "#pages": pdfbox:number-of-pages#1,
              @@ -650,13 +649,13 @@ declare %private variable $pdfbox:property-map:=map{
                  "labels":      pdfbox:labels-as-strings#1                     
               };
               
              -(:~ known property names sorted :)
              +(:~ Defined property names, sorted :)
               declare function pdfbox:property-names() 
               as xs:string*{
                 $pdfbox:property-map=>map:keys()=>sort()
               };
               
              -(:~  return value of $property for $pdf :)
              +(:~  Return the value of $property for $pdf :)
               declare function pdfbox:property($pdf as item(),$property as xs:string)
               as item()*{
                 let $fns:= $pdfbox:property-map($property)
              @@ -674,7 +673,7 @@ as map(*){
                pdfbox:report($pdfpaths,pdfbox:property-names())
               };
               
              -(:~ summary CSV style info for named properties for $pdfpaths 
              +(:~ summary CSV style info for named $properties for PDFs in $pdfpaths 
               @see https://docs.basex.org/main/CSV_Functions#xquery
               :)
               declare function pdfbox:report($pdfpaths as item()*, $properties as xs:string*)
              @@ -702,14 +701,14 @@ as map(*){
                 }
               };
               
              -(:~ convenience function to save report() data to file :)
              +(:~ Convenience function to save report() data to file :)
               declare function pdfbox:report-save($data as map(*),$dest as xs:string)
               as empty-sequence(){
                 let $opts := map {  "format":"xquery", "header":"yes", "separator" : "," }
                 return file:write-text($dest,csv:serialize($data,$opts))
               };
               
              -(:~ number of outline items :)
              +(:~ The number of outline items defined in $pdf :)
               declare function pdfbox:number-of-bookmarks($pdf as item())
               as xs:integer{
                 let $xml:=pdfbox:outline-xml($pdf)
              @@ -747,7 +746,7 @@ as map(*){
                 return map{"n":$n, "data": $read || $data}
               };
               
              -(:~ outline for $pdf as map()* :)
              +(:~ Return outline for $pdf as map()* :)
               declare function pdfbox:outline($pdf as item())
               as map(*)*{
                 (# db:wrapjava some #) {
              @@ -761,13 +760,13 @@ as map(*)*{
               };
               
               (:~ return bookmark info for children of $outlineItem as seq of maps :)
              -declare function pdfbox:outline($pdf as item(),$outlineItem as item()?)
              +declare %private function pdfbox:outline($pdf as item(),$outlineItem as item()?)
               as map(*)*{
                 let $find as map(*):=pdfbox:outline_($pdf ,$outlineItem)
                 return map:get($find,"list")
               };
               
              -(:~ BaseX bug 10.7? error if inlined in outline :)
              +(:~ outline helper. BaseX bug 10.7? error if inlined in outline :)
               declare %private function pdfbox:outline_($pdf as item(),$outlineItem as item()?)
               as map(*){
                 pdfbox:do-until(
              @@ -889,7 +888,7 @@ declare function pdfbox:page-labels($pdf)
                 =>PDDocumentCatalog:getPageLabels()
               };
               
              -(:~ label for $page formated as string :)
              +(:~ label for $page formated as string, empty if none :)
               declare function pdfbox:label-as-string($pagelabels,$page as  xs:integer)
               as xs:string?{
                 let $label:=PDPageLabels:getPageLabelRange($pagelabels,$page)
              @@ -906,7 +905,7 @@ as xs:string?{
                                   ))
               };
               
              -(:~ sequence of maps for each label in :)
              +(:~ sequence of maps for each label/page range defined in $pdf:)
               declare function pdfbox:labels-as-map($pdf as item())
               as map(*)*{
                 let $pagelabels:=PDDocument:getDocumentCatalog($pdf)
              @@ -916,7 +915,7 @@ as map(*)*{
                         !pdfbox:label-as-map($pagelabels,.)
               };
               
              -(:~ express label/page-range for $page as map :)
              +(:~ label/page-range for $page as map :)
               declare function pdfbox:label-as-map($pagelabels,$page as  xs:integer)
               as map(*)
               {
              @@ -945,7 +944,7 @@ as xs:string{
               };
               
               (:~ Return size of $pageNo (zero based)
              -@result e.g. [0.0,0.0,168.0,239.52]
              +@return e.g. [0.0,0.0,168.0,239.52]
                :)
               declare function pdfbox:page-media-box($pdf as item(), $pageNo as xs:integer)
               as xs:string{
              @@ -989,4 +988,4 @@ declare %private function pdfbox:do-until(
               };
               
              \ No newline at end of file +   on Wednesday, 4th June 2025

              \ No newline at end of file diff --git a/docs/xqdoc/modules/F000001/xqdoc.xml b/docs/xqdoc/modules/F000001/xqdoc.xml index d6ff83d..ecff44c 100644 --- a/docs/xqdoc/modules/F000001/xqdoc.xml +++ b/docs/xqdoc/modules/F000001/xqdoc.xml @@ -1,4 +1,4 @@ -2025-06-03T22:34:04.782+01:001.1org.expkg_zone58.Pdfbox3pdfbox +2025-06-04T10:09:22.636+01:001.1org.expkg_zone58.Pdfbox3pdfbox A BaseX 10.7+ interface to pdfbox3 https://pdfbox.apache.org/ , requires pdfbox jars on classpath, in lib/custom or xar @@ -144,9 +144,9 @@ as xs:base64Binary{ }; -(:~ property access map - keys are property names, - values are sequences of functions to get property from $pdf object +(:~ Defines a map from property names to evaluation method. + Keys are property names, + values are sequences of functions to get property value starting from a $pdf object. :) declare %private variable $pdfbox:property-map:=map{ "#pages": pdfbox:number-of-pages#1, @@ -185,13 +185,13 @@ declare %private variable $pdfbox:property-map:=map{ "labels": pdfbox:labels-as-strings#1 }; -(:~ known property names sorted :) +(:~ Defined property names, sorted :) declare function pdfbox:property-names() as xs:string*{ $pdfbox:property-map=>map:keys()=>sort() }; -(:~ return value of $property for $pdf :) +(:~ Return the value of $property for $pdf :) declare function pdfbox:property($pdf as item(),$property as xs:string) as item()*{ let $fns:= $pdfbox:property-map($property) @@ -209,7 +209,7 @@ as map(*){ pdfbox:report($pdfpaths,pdfbox:property-names()) }; -(:~ summary CSV style info for named properties for $pdfpaths +(:~ summary CSV style info for named $properties for PDFs in $pdfpaths @see https://docs.basex.org/main/CSV_Functions#xquery :) declare function pdfbox:report($pdfpaths as item()*, $properties as xs:string*) @@ -237,14 +237,14 @@ as map(*){ } }; -(:~ convenience function to save report() data to file :) +(:~ Convenience function to save report() data to file :) declare function pdfbox:report-save($data as map(*),$dest as xs:string) as empty-sequence(){ let $opts := map { "format":"xquery", "header":"yes", "separator" : "," } return file:write-text($dest,csv:serialize($data,$opts)) }; -(:~ number of outline items :) +(:~ The number of outline items defined in $pdf :) declare function pdfbox:number-of-bookmarks($pdf as item()) as xs:integer{ let $xml:=pdfbox:outline-xml($pdf) @@ -282,7 +282,7 @@ as map(*){ return map{"n":$n, "data": $read || $data} }; -(:~ outline for $pdf as map()* :) +(:~ Return outline for $pdf as map()* :) declare function pdfbox:outline($pdf as item()) as map(*)*{ (# db:wrapjava some #) { @@ -296,13 +296,13 @@ as map(*)*{ }; (:~ return bookmark info for children of $outlineItem as seq of maps :) -declare function pdfbox:outline($pdf as item(),$outlineItem as item()?) +declare %private function pdfbox:outline($pdf as item(),$outlineItem as item()?) as map(*)*{ let $find as map(*):=pdfbox:outline_($pdf ,$outlineItem) return map:get($find,"list") }; -(:~ BaseX bug 10.7? error if inlined in outline :) +(:~ outline helper. BaseX bug 10.7? error if inlined in outline :) declare %private function pdfbox:outline_($pdf as item(),$outlineItem as item()?) as map(*){ pdfbox:do-until( @@ -424,7 +424,7 @@ declare function pdfbox:page-labels($pdf) =>PDDocumentCatalog:getPageLabels() }; -(:~ label for $page formated as string :) +(:~ label for $page formated as string, empty if none :) declare function pdfbox:label-as-string($pagelabels,$page as xs:integer) as xs:string?{ let $label:=PDPageLabels:getPageLabelRange($pagelabels,$page) @@ -441,7 +441,7 @@ as xs:string?{ )) }; -(:~ sequence of maps for each label in :) +(:~ sequence of maps for each label/page range defined in $pdf:) declare function pdfbox:labels-as-map($pdf as item()) as map(*)*{ let $pagelabels:=PDDocument:getDocumentCatalog($pdf) @@ -451,7 +451,7 @@ as map(*)*{ !pdfbox:label-as-map($pagelabels,.) }; -(:~ express label/page-range for $page as map :) +(:~ label/page-range for $page as map :) declare function pdfbox:label-as-map($pagelabels,$page as xs:integer) as map(*) { @@ -480,7 +480,7 @@ as xs:string{ }; (:~ Return size of $pageNo (zero based) -@result e.g. [0.0,0.0,168.0,239.52] +@return e.g. [0.0,0.0,168.0,239.52] :) declare function pdfbox:page-media-box($pdf as item(), $pageNo as xs:integer) as xs:string{ @@ -523,9 +523,9 @@ declare %private function pdfbox:do-until( }; pdfbox:property-map -property access map -keys are property names, -values are sequences of functions to get property from $pdf object +Defines a map from property names to evaluation method. +Keys are property names, +values are sequences of functions to get property value starting from a $pdf object. org.expkg_zone58.Pdfbox3number-of-pagesorg.expkg_zone58.Pdfbox3number-of-bookmarksorg.expkg_zone58.Pdfbox3number-of-labelsorg.expkg_zone58.Pdfbox3specificationjava:org.apache.pdfbox.pdmodel.PDDocumentgetDocumentInformationjava:org.apache.pdfbox.pdmodel.PDDocumentInformationgetTitlejava:org.apache.pdfbox.pdmodel.PDDocumentgetDocumentInformationjava:org.apache.pdfbox.pdmodel.PDDocumentInformationgetAuthorjava:org.apache.pdfbox.pdmodel.PDDocumentgetDocumentInformationjava:org.apache.pdfbox.pdmodel.PDDocumentInformationgetCreatorjava:org.apache.pdfbox.pdmodel.PDDocumentgetDocumentInformationjava:org.apache.pdfbox.pdmodel.PDDocumentInformationgetProducerjava:org.apache.pdfbox.pdmodel.PDDocumentgetDocumentInformationjava:org.apache.pdfbox.pdmodel.PDDocumentInformationgetSubjectjava:org.apache.pdfbox.pdmodel.PDDocumentgetDocumentInformationjava:org.apache.pdfbox.pdmodel.PDDocumentInformationgetKeywordsjava:org.apache.pdfbox.pdmodel.PDDocumentgetDocumentInformationjava:org.apache.pdfbox.pdmodel.PDDocumentInformationgetCreationDateorg.expkg_zone58.Pdfbox3gregToISOjava:org.apache.pdfbox.pdmodel.PDDocumentgetDocumentInformationjava:org.apache.pdfbox.pdmodel.PDDocumentInformationgetModificationDateorg.expkg_zone58.Pdfbox3gregToISOorg.expkg_zone58.Pdfbox3labels-as-stringsvariable $pdfbox:property-map:=map{ "#pages": pdfbox:number-of-pages#1, @@ -637,11 +637,11 @@ as xs:base64Binary{ =>convert:integers-to-base64() } -known property names sortedpdfbox:property-namesfunction pdfbox:property-names ( ) as xs:string* { $pdfbox:property-map=>map:keys()=>sort() }xs:stringorg.expkg_zone58.Pdfbox3property-mapfunction pdfbox:property-names() +Defined property names, sortedpdfbox:property-namesfunction pdfbox:property-names ( ) as xs:string* { $pdfbox:property-map=>map:keys()=>sort() }xs:stringorg.expkg_zone58.Pdfbox3property-mapfunction pdfbox:property-names() as xs:string*{ $pdfbox:property-map=>map:keys()=>sort() } -return value of $property for $pdfpdfbox:propertyfunction pdfbox:property ( $pdf as item(),$property as xs:string ) as item()* { let $fns:= $pdfbox:property-map($property) return if(exists($fns)) then fold-left($fns, $pdf, function($result,$this as function(*)){$result!$this(.)}) else error(xs:QName('pdfbox:property'),concat("Property '",$property,"' not defined.")) }pdfitem()propertyxs:stringitem()http://www.w3.org/2005/xpath-functionsexistshttp://www.w3.org/2005/xpath-functionsfold-lefthttp://www.w3.org/2005/xpath-functionserrorhttp://www.w3.org/2001/XMLSchemaQNamehttp://www.w3.org/2005/xpath-functionsconcatorg.expkg_zone58.Pdfbox3property-mapfunction pdfbox:property($pdf as item(),$property as xs:string) +Return the value of $property for $pdfpdfbox:propertyfunction pdfbox:property ( $pdf as item(),$property as xs:string ) as item()* { let $fns:= $pdfbox:property-map($property) return if(exists($fns)) then fold-left($fns, $pdf, function($result,$this as function(*)){$result!$this(.)}) else error(xs:QName('pdfbox:property'),concat("Property '",$property,"' not defined.")) }pdfitem()propertyxs:stringitem()http://www.w3.org/2005/xpath-functionsexistshttp://www.w3.org/2005/xpath-functionsfold-lefthttp://www.w3.org/2005/xpath-functionserrorhttp://www.w3.org/2001/XMLSchemaQNamehttp://www.w3.org/2005/xpath-functionsconcatorg.expkg_zone58.Pdfbox3property-mapfunction pdfbox:property($pdf as item(),$property as xs:string) as item()*{ let $fns:= $pdfbox:property-map($property) return if(exists($fns)) @@ -655,7 +655,7 @@ summary CSV style info for all properties for $pdfpaths as map(*){ pdfbox:report($pdfpaths,pdfbox:property-names()) } -summary CSV style info for named properties for $pdfpaths +summary CSV style info for named $properties for PDFs in $pdfpaths https://docs.basex.org/main/CSV_Functions#xquerypdfbox:reportfunction pdfbox:report ( $pdfpaths as item()*, $properties as xs:string* ) as map(*) { map{"names": array{"path",$properties}, "records": for $path in $pdfpaths let $name:=if($path instance of xs:base64Binary) then "binary" else $path return try{ let $pdf:=pdfbox:open($path) return (fold-left($properties, array{$name}, function($result as array(*),$prop as xs:string){ array:append($result, string(pdfbox:property($pdf, $prop)))} ), pdfbox:close($pdf) ) } catch *{ fold-left($properties, array{$name}, function($result as array(*),$prop as xs:string){ array:append($result, "#ERROR")} ) } } }pdfpathsitem()propertiesxs:stringmap(*)org.expkg_zone58.Pdfbox3openhttp://www.w3.org/2005/xpath-functionsfold-lefthttp://www.w3.org/2005/xpath-functions/arrayappendhttp://www.w3.org/2005/xpath-functionsstringorg.expkg_zone58.Pdfbox3propertyorg.expkg_zone58.Pdfbox3closehttp://www.w3.org/2005/xpath-functionsfold-lefthttp://www.w3.org/2005/xpath-functions/arrayappendfunction pdfbox:report($pdfpaths as item()*, $properties as xs:string*) as map(*){ map{"names": array{"path",$properties}, @@ -680,12 +680,12 @@ as map(*){ } } -convenience function to save report() data to filepdfbox:report-savefunction pdfbox:report-save ( $data as map(*),$dest as xs:string ) as empty-sequence() { let $opts := map { "format":"xquery", "header":"yes", "separator" : "," } return file:write-text($dest,csv:serialize($data,$opts)) }datamap(*)destxs:stringempty-sequencehttp://expath.org/ns/filewrite-texthttp://basex.org/modules/csvserializefunction pdfbox:report-save($data as map(*),$dest as xs:string) +Convenience function to save report() data to filepdfbox:report-savefunction pdfbox:report-save ( $data as map(*),$dest as xs:string ) as empty-sequence() { let $opts := map { "format":"xquery", "header":"yes", "separator" : "," } return file:write-text($dest,csv:serialize($data,$opts)) }datamap(*)destxs:stringempty-sequencehttp://expath.org/ns/filewrite-texthttp://basex.org/modules/csvserializefunction pdfbox:report-save($data as map(*),$dest as xs:string) as empty-sequence(){ let $opts := map { "format":"xquery", "header":"yes", "separator" : "," } return file:write-text($dest,csv:serialize($data,$opts)) } -number of outline itemspdfbox:number-of-bookmarksfunction pdfbox:number-of-bookmarks ( $pdf as item() ) as xs:integer { let $xml:=pdfbox:outline-xml($pdf) return count($xml//bookmark) }pdfitem()xs:integerorg.expkg_zone58.Pdfbox3outline-xmlhttp://www.w3.org/2005/xpath-functionscountfunction pdfbox:number-of-bookmarks($pdf as item()) +The number of outline items defined in $pdfpdfbox:number-of-bookmarksfunction pdfbox:number-of-bookmarks ( $pdf as item() ) as xs:integer { let $xml:=pdfbox:outline-xml($pdf) return count($xml//bookmark) }pdfitem()xs:integerorg.expkg_zone58.Pdfbox3outline-xmlhttp://www.w3.org/2005/xpath-functionscountfunction pdfbox:number-of-bookmarks($pdf as item()) as xs:integer{ let $xml:=pdfbox:outline-xml($pdf) return count($xml//bookmark) @@ -716,7 +716,7 @@ as map(*){ let $data:=convert:integers-to-base64(subsequence($buff,1,$n))=>convert:binary-to-string() return map{"n":$n, "data": $read || $data} } -outline for $pdf as map()*pdfbox:outlinefunction pdfbox:outline ( $pdf as item() ) as map(*)* { (# db:wrapjava some #) { let $outline:= PDDocument:getDocumentCatalog($pdf) =>PDDocumentCatalog:getDocumentOutline() return if(exists($outline)) then pdfbox:outline($pdf,PDOutlineItem:getFirstChild($outline)) } }pdfitem()map(*)java:org.apache.pdfbox.pdmodel.PDDocumentgetDocumentCataloghttp://www.w3.org/2005/xpath-functionsexistsorg.expkg_zone58.Pdfbox3outlinejava:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItemgetFirstChildfunction pdfbox:outline($pdf as item()) +Return outline for $pdf as map()*pdfbox:outlinefunction pdfbox:outline ( $pdf as item() ) as map(*)* { (# db:wrapjava some #) { let $outline:= PDDocument:getDocumentCatalog($pdf) =>PDDocumentCatalog:getDocumentOutline() return if(exists($outline)) then pdfbox:outline($pdf,PDOutlineItem:getFirstChild($outline)) } }pdfitem()map(*)java:org.apache.pdfbox.pdmodel.PDDocumentgetDocumentCataloghttp://www.w3.org/2005/xpath-functionsexistsorg.expkg_zone58.Pdfbox3outlinejava:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItemgetFirstChildfunction pdfbox:outline($pdf as item()) as map(*)*{ (# db:wrapjava some #) { let $outline:= @@ -727,12 +727,12 @@ as map(*)*{ then pdfbox:outline($pdf,PDOutlineItem:getFirstChild($outline)) } } -return bookmark info for children of $outlineItem as seq of mapspdfbox:outlinefunction pdfbox:outline ( $pdf as item(),$outlineItem as item()? ) as map(*)* { let $find as map(*):=pdfbox:outline_($pdf ,$outlineItem) return map:get($find,"list") }pdfitem()outlineItemitem()map(*)org.expkg_zone58.Pdfbox3outline_http://www.w3.org/2005/xpath-functions/mapgetfunction pdfbox:outline($pdf as item(),$outlineItem as item()?) +return bookmark info for children of $outlineItem as seq of mapspdfbox:outlinefunction pdfbox:outline ( $pdf as item(),$outlineItem as item()? ) as map(*)* { let $find as map(*):=pdfbox:outline_($pdf ,$outlineItem) return map:get($find,"list") }pdfitem()outlineItemitem()map(*)org.expkg_zone58.Pdfbox3outline_http://www.w3.org/2005/xpath-functions/mapgetfunction pdfbox:outline($pdf as item(),$outlineItem as item()?) as map(*)*{ let $find as map(*):=pdfbox:outline_($pdf ,$outlineItem) return map:get($find,"list") } -BaseX bug 10.7? error if inlined in outlinepdfbox:outline_function pdfbox:outline_ ( $pdf as item(),$outlineItem as item()? ) as map(*) { pdfbox:do-until( map{"list":(),"this":$outlineItem}, function($input,$pos ) { let $bk:= pdfbox:bookmark($input?this,$pdf) let $bk:= if($bk?hasChildren) then let $kids:=pdfbox:outline($pdf,PDOutlineItem:getFirstChild($input?this)) return map:merge(($bk,map:entry("children",$kids))) else $bk return map{ "list": ($input?list, $bk), "this": PDOutlineItem:getNextSibling($input?this)} }, function($output,$pos) { empty($output?this) } ) }pdfitem()outlineItemitem()map(*)org.expkg_zone58.Pdfbox3do-untilorg.expkg_zone58.Pdfbox3bookmarkorg.expkg_zone58.Pdfbox3outlinejava:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItemgetFirstChildhttp://www.w3.org/2005/xpath-functions/mapmergehttp://www.w3.org/2005/xpath-functions/mapentryjava:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItemgetNextSiblinghttp://www.w3.org/2005/xpath-functionsemptyfunction pdfbox:outline_($pdf as item(),$outlineItem as item()?) +outline helper. BaseX bug 10.7? error if inlined in outlinepdfbox:outline_function pdfbox:outline_ ( $pdf as item(),$outlineItem as item()? ) as map(*) { pdfbox:do-until( map{"list":(),"this":$outlineItem}, function($input,$pos ) { let $bk:= pdfbox:bookmark($input?this,$pdf) let $bk:= if($bk?hasChildren) then let $kids:=pdfbox:outline($pdf,PDOutlineItem:getFirstChild($input?this)) return map:merge(($bk,map:entry("children",$kids))) else $bk return map{ "list": ($input?list, $bk), "this": PDOutlineItem:getNextSibling($input?this)} }, function($output,$pos) { empty($output?this) } ) }pdfitem()outlineItemitem()map(*)org.expkg_zone58.Pdfbox3do-untilorg.expkg_zone58.Pdfbox3bookmarkorg.expkg_zone58.Pdfbox3outlinejava:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItemgetFirstChildhttp://www.w3.org/2005/xpath-functions/mapmergehttp://www.w3.org/2005/xpath-functions/mapentryjava:org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItemgetNextSiblinghttp://www.w3.org/2005/xpath-functionsemptyfunction pdfbox:outline_($pdf as item(),$outlineItem as item()?) as map(*){ pdfbox:do-until( @@ -828,7 +828,7 @@ get pagelabels existpdfbox:page- PDDocument:getDocumentCatalog($pdf) =>PDDocumentCatalog:getPageLabels() } -label for $page formated as stringpdfbox:label-as-stringfunction pdfbox:label-as-string ( $pagelabels,$page as xs:integer ) as xs:string? { let $label:=PDPageLabels:getPageLabelRange($pagelabels,$page) return if(empty($label)) then () else let $start:= PDPageLabelRange:getStart($label) let $style := PDPageLabelRange:getStyle($label) let $prefix:= PDPageLabelRange:getPrefix($label) return string-join(($page, if(empty($style)) then "-" else $style, if(($start eq 1)) then "" else $start, if(exists($prefix)) then '*' || $prefix (:TODO double " :) )) }pagelabelspagexs:integerxs:stringjava:org.apache.pdfbox.pdmodel.common.PDPageLabelsgetPageLabelRangehttp://www.w3.org/2005/xpath-functionsemptyjava:org.apache.pdfbox.pdmodel.common.PDPageLabelRangegetStartjava:org.apache.pdfbox.pdmodel.common.PDPageLabelRangegetStylejava:org.apache.pdfbox.pdmodel.common.PDPageLabelRangegetPrefixhttp://www.w3.org/2005/xpath-functionsstring-joinhttp://www.w3.org/2005/xpath-functionsemptyhttp://www.w3.org/2005/xpath-functionsexistsfunction pdfbox:label-as-string($pagelabels,$page as xs:integer) +label for $page formated as string, empty if nonepdfbox:label-as-stringfunction pdfbox:label-as-string ( $pagelabels,$page as xs:integer ) as xs:string? { let $label:=PDPageLabels:getPageLabelRange($pagelabels,$page) return if(empty($label)) then () else let $start:= PDPageLabelRange:getStart($label) let $style := PDPageLabelRange:getStyle($label) let $prefix:= PDPageLabelRange:getPrefix($label) return string-join(($page, if(empty($style)) then "-" else $style, if(($start eq 1)) then "" else $start, if(exists($prefix)) then '*' || $prefix (:TODO double " :) )) }pagelabelspagexs:integerxs:stringjava:org.apache.pdfbox.pdmodel.common.PDPageLabelsgetPageLabelRangehttp://www.w3.org/2005/xpath-functionsemptyjava:org.apache.pdfbox.pdmodel.common.PDPageLabelRangegetStartjava:org.apache.pdfbox.pdmodel.common.PDPageLabelRangegetStylejava:org.apache.pdfbox.pdmodel.common.PDPageLabelRangegetPrefixhttp://www.w3.org/2005/xpath-functionsstring-joinhttp://www.w3.org/2005/xpath-functionsemptyhttp://www.w3.org/2005/xpath-functionsexistsfunction pdfbox:label-as-string($pagelabels,$page as xs:integer) as xs:string?{ let $label:=PDPageLabels:getPageLabelRange($pagelabels,$page) return if(empty($label)) @@ -843,7 +843,7 @@ as xs:string?{ if(exists($prefix)) then '*' || $prefix (:TODO double " :) )) } -sequence of maps for each label inpdfbox:labels-as-mapfunction pdfbox:labels-as-map ( $pdf as item() ) as map(*)* { let $pagelabels:=PDDocument:getDocumentCatalog($pdf) =>PDDocumentCatalog:getPageLabels() return $pagelabels !(0 to pdfbox:number-of-pages($pdf)-1) !pdfbox:label-as-map($pagelabels,.) }pdfitem()map(*)java:org.apache.pdfbox.pdmodel.PDDocumentgetDocumentCatalogorg.expkg_zone58.Pdfbox3number-of-pagesorg.expkg_zone58.Pdfbox3label-as-mapfunction pdfbox:labels-as-map($pdf as item()) +sequence of maps for each label/page range defined in $pdfpdfbox:labels-as-mapfunction pdfbox:labels-as-map ( $pdf as item() ) as map(*)* { let $pagelabels:=PDDocument:getDocumentCatalog($pdf) =>PDDocumentCatalog:getPageLabels() return $pagelabels !(0 to pdfbox:number-of-pages($pdf)-1) !pdfbox:label-as-map($pagelabels,.) }pdfitem()map(*)java:org.apache.pdfbox.pdmodel.PDDocumentgetDocumentCatalogorg.expkg_zone58.Pdfbox3number-of-pagesorg.expkg_zone58.Pdfbox3label-as-mapfunction pdfbox:labels-as-map($pdf as item()) as map(*)*{ let $pagelabels:=PDDocument:getDocumentCatalog($pdf) =>PDDocumentCatalog:getPageLabels() @@ -851,7 +851,7 @@ as map(*)*{ !(0 to pdfbox:number-of-pages($pdf)-1) !pdfbox:label-as-map($pagelabels,.) } -express label/page-range for $page as mappdfbox:label-as-mapfunction pdfbox:label-as-map ( $pagelabels,$page as xs:integer ) as map(*) { let $label:=PDPageLabels:getPageLabelRange($pagelabels,$page) return if(empty($label)) then () else map{ "index": $page, "prefix": PDPageLabelRange:getPrefix($label), "start": PDPageLabelRange:getStart($label), "style": PDPageLabelRange:getStyle($label) } }pagelabelspagexs:integermap(*)java:org.apache.pdfbox.pdmodel.common.PDPageLabelsgetPageLabelRangehttp://www.w3.org/2005/xpath-functionsemptyjava:org.apache.pdfbox.pdmodel.common.PDPageLabelRangegetPrefixjava:org.apache.pdfbox.pdmodel.common.PDPageLabelRangegetStartjava:org.apache.pdfbox.pdmodel.common.PDPageLabelRangegetStylefunction pdfbox:label-as-map($pagelabels,$page as xs:integer) +label/page-range for $page as mappdfbox:label-as-mapfunction pdfbox:label-as-map ( $pagelabels,$page as xs:integer ) as map(*) { let $label:=PDPageLabels:getPageLabelRange($pagelabels,$page) return if(empty($label)) then () else map{ "index": $page, "prefix": PDPageLabelRange:getPrefix($label), "start": PDPageLabelRange:getStart($label), "style": PDPageLabelRange:getStyle($label) } }pagelabelspagexs:integermap(*)java:org.apache.pdfbox.pdmodel.common.PDPageLabelsgetPageLabelRangehttp://www.w3.org/2005/xpath-functionsemptyjava:org.apache.pdfbox.pdmodel.common.PDPageLabelRangegetPrefixjava:org.apache.pdfbox.pdmodel.common.PDPageLabelRangegetStartjava:org.apache.pdfbox.pdmodel.common.PDPageLabelRangegetStylefunction pdfbox:label-as-map($pagelabels,$page as xs:integer) as map(*) { let $label:=PDPageLabels:getPageLabelRange($pagelabels,$page) @@ -874,7 +874,7 @@ as xs:string{ return (# db:checkstrings #) {PDFTextStripper:getText($tStripper,$pdf)} } Return size of $pageNo (zero based) -e.g. [0.0,0.0,168.0,239.52]pdfbox:page-media-boxfunction pdfbox:page-media-box ( $pdf as item(), $pageNo as xs:integer ) as xs:string { PDDocument:getPage($pdf, $pageNo) =>PDPage:getMediaBox() =>PDRectangle:toString() }pdfitem()pageNoxs:integerxs:stringjava:org.apache.pdfbox.pdmodel.PDDocumentgetPagefunction pdfbox:page-media-box($pdf as item(), $pageNo as xs:integer) +e.g. [0.0,0.0,168.0,239.52]pdfbox:page-media-boxfunction pdfbox:page-media-box ( $pdf as item(), $pageNo as xs:integer ) as xs:string { PDDocument:getPage($pdf, $pageNo) =>PDPage:getMediaBox() =>PDRectangle:toString() }pdfitem()pageNoxs:integerxs:stringjava:org.apache.pdfbox.pdmodel.PDDocumentgetPagefunction pdfbox:page-media-box($pdf as item(), $pageNo as xs:integer) as xs:string{ PDDocument:getPage($pdf, $pageNo) =>PDPage:getMediaBox() diff --git a/docs/xqdoc/modules/F000001/xqparse.xml b/docs/xqdoc/modules/F000001/xqparse.xml index 572361d..890a7c5 100644 --- a/docs/xqdoc/modules/F000001/xqparse.xml +++ b/docs/xqdoc/modules/F000001/xqparse.xml @@ -139,9 +139,9 @@ options.format="bmp jpg png gif" etc, options.scale= 1 is 72 dpi?? :) }; -(:~ property access map - keys are property names, - values are sequences of functions to get property from $pdf object +(:~ Defines a map from property names to evaluation method. + Keys are property names, + values are sequences of functions to get property value starting from a $pdf object. :) declare %private variable $pdfbox:property-map:=map{ "#pages": pdfbox:number-of-pages#1, @@ -180,13 +180,13 @@ options.format="bmp jpg png gif" etc, options.scale= 1 is 72 dpi?? :) "labels": pdfbox:labels-as-strings#1 }; -(:~ known property names sorted :) +(:~ Defined property names, sorted :) declare function pdfbox:property-names() as xs:string*{ $pdfbox:property-map=>map:keys()=>sort() }; -(:~ return value of $property for $pdf :) +(:~ Return the value of $property for $pdf :) declare function pdfbox:property($pdf as item(),$property as xs:string) as item()*{ let $fns:= $pdfbox:property-map($property) @@ -204,7 +204,7 @@ options.format="bmp jpg png gif" etc, options.scale= 1 is 72 dpi?? :) pdfbox:report($pdfpaths,pdfbox:property-names()) }; -(:~ summary CSV style info for named properties for $pdfpaths +(:~ summary CSV style info for named $properties for PDFs in $pdfpaths @see https://docs.basex.org/main/CSV_Functions#xquery :) declare function pdfbox:report($pdfpaths as item()*, $properties as xs:string*) @@ -232,14 +232,14 @@ options.format="bmp jpg png gif" etc, options.scale= 1 is 72 dpi?? :) } }; -(:~ convenience function to save report() data to file :) +(:~ Convenience function to save report() data to file :) declare function pdfbox:report-save($data as map(*),$dest as xs:string) as empty-sequence(){ let $opts := map { "format":"xquery", "header":"yes", "separator" : "," } return file:write-text($dest,csv:serialize($data,$opts)) }; -(:~ number of outline items :) +(:~ The number of outline items defined in $pdf :) declare function pdfbox:number-of-bookmarks($pdf as item()) as xs:integer{ let $xml:=pdfbox:outline-xml($pdf) @@ -277,7 +277,7 @@ options.format="bmp jpg png gif" etc, options.scale= 1 is 72 dpi?? :) return map{"n":$n, "data": $read || $data} }; -(:~ outline for $pdf as map()* :) +(:~ Return outline for $pdf as map()* :) declare function pdfbox:outline($pdf as item()) as map(*)*{ (# db:wrapjava some #) { @@ -291,13 +291,13 @@ options.format="bmp jpg png gif" etc, options.scale= 1 is 72 dpi?? :) }; (:~ return bookmark info for children of $outlineItem as seq of maps :) -declare function pdfbox:outline($pdf as item(),$outlineItem as item()?) +declare %private function pdfbox:outline($pdf as item(),$outlineItem as item()?) as map(*)*{ let $find as map(*):=pdfbox:outline_($pdf ,$outlineItem) return map:get($find,"list") }; -(:~ BaseX bug 10.7? error if inlined in outline :) +(:~ outline helper. BaseX bug 10.7? error if inlined in outline :) declare %private function pdfbox:outline_($pdf as item(),$outlineItem as item()?) as map(*){ pdfbox:do-until( @@ -419,7 +419,7 @@ The returned sequence will contain at MOST as much entries as the document has p =>PDDocumentCatalog:getPageLabels() }; -(:~ label for $page formated as string :) +(:~ label for $page formated as string, empty if none :) declare function pdfbox:label-as-string($pagelabels,$page as xs:integer) as xs:string?{ let $label:=PDPageLabels:getPageLabelRange($pagelabels,$page) @@ -436,7 +436,7 @@ The returned sequence will contain at MOST as much entries as the document has p )) }; -(:~ sequence of maps for each label in :) +(:~ sequence of maps for each label/page range defined in $pdf:) declare function pdfbox:labels-as-map($pdf as item()) as map(*)*{ let $pagelabels:=PDDocument:getDocumentCatalog($pdf) @@ -446,7 +446,7 @@ The returned sequence will contain at MOST as much entries as the document has p !pdfbox:label-as-map($pagelabels,.) }; -(:~ express label/page-range for $page as map :) +(:~ label/page-range for $page as map :) declare function pdfbox:label-as-map($pagelabels,$page as xs:integer) as map(*) { @@ -475,7 +475,7 @@ The returned sequence will contain at MOST as much entries as the document has p }; (:~ Return size of $pageNo (zero based) -@result e.g. [0.0,0.0,168.0,239.52] +@return e.g. [0.0,0.0,168.0,239.52] :) declare function pdfbox:page-media-box($pdf as item(), $pageNo as xs:integer) as xs:string{ diff --git a/docs/xqdoc/restxq.html b/docs/xqdoc/restxq.html index 972f729..813eace 100644 --- a/docs/xqdoc/restxq.html +++ b/docs/xqdoc/restxq.html @@ -7,4 +7,4 @@ Contents
              1. 1 Summary
              2. 2 Rest Paths

              Summary

              No RESTXQ usage

              Related documents
              ViewDescriptionFormat
              reportIndex of sourcesxhtml
              importsSummary of import usagexhtml
              imports-diagProject wide module imports as html mermaid class diagramhtml5
              imports-diag.mmdProject wide module imports as a mermaid class diagramtext
              annotationsSummary of XQuery annotation usexhtml
              xqdoca.xmlxqDocA run configuration report (XML)xml
              xqdoc-validatevalidate generated xqdoc filesxml

              Rest interface paths

              \ No newline at end of file +   on Wednesday, 4th June 2025

              \ No newline at end of file diff --git a/docs/xqdoc/validation-report.xml b/docs/xqdoc/validation-report.xml index b6a91ea..5e59a2d 100644 --- a/docs/xqdoc/validation-report.xml +++ b/docs/xqdoc/validation-report.xml @@ -1 +1 @@ -valid \ No newline at end of file +valid \ No newline at end of file diff --git a/docs/xqdoc/xqdoca.xml b/docs/xqdoc/xqdoca.xml index 3e91b97..71c6a98 100644 --- a/docs/xqdoc/xqdoca.xml +++ b/docs/xqdoc/xqdoca.xml @@ -1,4 +1,4 @@ -0.9.1docs/xqdoc/ +0.9.1docs/xqdoc/ report restxq imports diff --git a/readme.md b/readme.md index 6dd35d0..bb310bb 100644 --- a/readme.md +++ b/readme.md @@ -1,28 +1,28 @@ # Pdfbox A `BaseX` interface for the `Apache Pdfbox library` version 3. -The [Apache PDFBox® library](https://pdfbox.apache.org/) is an open source Java tool for working with PDF documents. This project allows creation of new PDF documents, manipulation of existing documents and the ability to extract content from documents. +> The [Apache PDFBox® library](https://pdfbox.apache.org/) is an open source Java tool for working with PDF documents. This project allows creation of new PDF documents, manipulation of existing documents and the ability to extract content from documents. -This interface is packaged in the [Expath](https://docs.basex.org/main/Repository#expath_packaging) format. The package includes the required Pdfbox jars. -A test suite is available and workflow actions run this on BaseX 10.7 and 11.7. +This interface is packaged in the [Expath XAR](https://docs.basex.org/main/Repository#expath_packaging) format. The package includes the required Pdfbox jars. +A test suite is available and workflow actions run these tests against BaseX 10.7 and 11.7. > [!NOTE] ->Currently (v0.1.5) works with BaseX 9.7, but this may change with future versions. +>Currently (v0.3.6) works with BaseX 9.7, but this may change with future versions. ## Features The features focus on extracting information from PDFs rather than creation or editing of PDFs. ### Supported -* read PDF page count. -* read any PDF outline and return as map(s) or XML. -* read pagelabels. -* read page text. -* save pdf page range to a new pdf. -* save image of rendered pdf page. -* open PDF with password -* read XMP metadata -* Page size information -* support for xs:base64Binary in function inputs and outputs to facilitate database and store usage. +* Read PDF page count. +* Read any PDF outline and return as map(s) or XML. +* Read pagelabels. +* Read page text. +* Save pdf page range to a new pdf. +* Create image of rendered pdf page. +* Open PDF with password. +* Read XMP metadata. +* Page size information. +* Datatype xs:base64Binary in function inputs and outputs to facilitate database and store usage. ### Not supported: * creating PDFs with new content @@ -50,7 +50,7 @@ pdfbox:with-pdf("...path/to/pdf.pdf", * `scripts/make-xar.xq` packages the required `jar`s and `xqm` files to a `xar` file in the `dist` folder. -The `package.json` is used/abused as a configuration source. Non standard information is held in the `expkg_zone58` section. This is experimental and may change. +The `package.json` is (ab)used as a configuration source. Non standard information is held in the `expkg_zone58` section. This is experimental and may change. ### Action support -The workflow `ci-basex.yaml` builds and tests the package. This can be used as an action on [github](https://github.com/features/actions), or on a local [gitea](https://docs.gitea.com/usage/actions/overview) installation. +The workflow `ci-basex.yaml` builds and tests the package. This can be used as an action on [github](https://github.com/features/actions), or on a local [gitea](https://docs.gitea.com/usage/actions/overview) or [forgejo](https://forgejo.org/) installation. diff --git a/src/Pdfbox3.xqm b/src/Pdfbox3.xqm index 7afbf8c..7db1add 100644 --- a/src/Pdfbox3.xqm +++ b/src/Pdfbox3.xqm @@ -139,9 +139,9 @@ as xs:base64Binary{ }; -(:~ property access map - keys are property names, - values are sequences of functions to get property from $pdf object +(:~ Defines a map from property names to evaluation method. + Keys are property names, + values are sequences of functions to get property value starting from a $pdf object. :) declare %private variable $pdfbox:property-map:=map{ "#pages": pdfbox:number-of-pages#1, @@ -180,13 +180,13 @@ declare %private variable $pdfbox:property-map:=map{ "labels": pdfbox:labels-as-strings#1 }; -(:~ known property names sorted :) +(:~ Defined property names, sorted :) declare function pdfbox:property-names() as xs:string*{ $pdfbox:property-map=>map:keys()=>sort() }; -(:~ return value of $property for $pdf :) +(:~ Return the value of $property for $pdf :) declare function pdfbox:property($pdf as item(),$property as xs:string) as item()*{ let $fns:= $pdfbox:property-map($property) @@ -204,7 +204,7 @@ as map(*){ pdfbox:report($pdfpaths,pdfbox:property-names()) }; -(:~ summary CSV style info for named properties for $pdfpaths +(:~ summary CSV style info for named $properties for PDFs in $pdfpaths @see https://docs.basex.org/main/CSV_Functions#xquery :) declare function pdfbox:report($pdfpaths as item()*, $properties as xs:string*) @@ -232,14 +232,14 @@ as map(*){ } }; -(:~ convenience function to save report() data to file :) +(:~ Convenience function to save report() data to file :) declare function pdfbox:report-save($data as map(*),$dest as xs:string) as empty-sequence(){ let $opts := map { "format":"xquery", "header":"yes", "separator" : "," } return file:write-text($dest,csv:serialize($data,$opts)) }; -(:~ number of outline items :) +(:~ The number of outline items defined in $pdf :) declare function pdfbox:number-of-bookmarks($pdf as item()) as xs:integer{ let $xml:=pdfbox:outline-xml($pdf) @@ -277,7 +277,7 @@ as map(*){ return map{"n":$n, "data": $read || $data} }; -(:~ outline for $pdf as map()* :) +(:~ Return outline for $pdf as map()* :) declare function pdfbox:outline($pdf as item()) as map(*)*{ (# db:wrapjava some #) { @@ -291,13 +291,13 @@ as map(*)*{ }; (:~ return bookmark info for children of $outlineItem as seq of maps :) -declare function pdfbox:outline($pdf as item(),$outlineItem as item()?) +declare %private function pdfbox:outline($pdf as item(),$outlineItem as item()?) as map(*)*{ let $find as map(*):=pdfbox:outline_($pdf ,$outlineItem) return map:get($find,"list") }; -(:~ BaseX bug 10.7? error if inlined in outline :) +(:~ outline helper. BaseX bug 10.7? error if inlined in outline :) declare %private function pdfbox:outline_($pdf as item(),$outlineItem as item()?) as map(*){ pdfbox:do-until( @@ -401,14 +401,16 @@ as xs:string* =>PDPageLabels:getLabelsByPageIndices() }; -(:~ sequence of label ranges defined in PDF as formatted strings :) -declare function pdfbox:labels-as-strings($pdf as item()) +(:~ sequence of label ranges defined in PDF as formatted strings +@return a custom representation of the labels e.g "0-*Cover,1r,11D" +:) +declare function pdfbox:labels-as-string($pdf as item()) as xs:string{ let $pagelabels:=PDDocument:getDocumentCatalog($pdf) =>PDDocumentCatalog:getPageLabels() return $pagelabels !(0 to pdfbox:number-of-pages($pdf)-1) - !pdfbox:label-as-string($pagelabels,.)=>string-join(",") + !pdfbox:label-as-string($pagelabels,.)=>string-join(" ") }; @@ -419,7 +421,7 @@ declare function pdfbox:page-labels($pdf) =>PDDocumentCatalog:getPageLabels() }; -(:~ label for $page formated as string :) +(:~ label for $page formated as string, empty if none :) declare function pdfbox:label-as-string($pagelabels,$page as xs:integer) as xs:string?{ let $label:=PDPageLabels:getPageLabelRange($pagelabels,$page) @@ -436,7 +438,7 @@ as xs:string?{ )) }; -(:~ sequence of maps for each label in :) +(:~ sequence of maps for each label/page range defined in $pdf:) declare function pdfbox:labels-as-map($pdf as item()) as map(*)*{ let $pagelabels:=PDDocument:getDocumentCatalog($pdf) @@ -446,7 +448,7 @@ as map(*)*{ !pdfbox:label-as-map($pagelabels,.) }; -(:~ express label/page-range for $page as map :) +(:~ label/page-range for $page as map :) declare function pdfbox:label-as-map($pagelabels,$page as xs:integer) as map(*) { @@ -475,7 +477,7 @@ as xs:string{ }; (:~ Return size of $pageNo (zero based) -@result e.g. [0.0,0.0,168.0,239.52] +@return e.g. [0.0,0.0,168.0,239.52] :) declare function pdfbox:page-media-box($pdf as item(), $pageNo as xs:integer) as xs:string{ diff --git a/tests/test.xqm b/tests/test.xqm index 61d9f91..4364e0e 100644 --- a/tests/test.xqm +++ b/tests/test.xqm @@ -51,7 +51,7 @@ declare %unit:test function test:labels(){ let $pdf:=test:open("samples.pdf/BaseX100.pdf") - let $labels:=pdfbox:labels($pdf) + let $labels:=pdfbox:labels-by-page($pdf) return ( unit:assert-equals(count($labels),pdfbox:number-of-pages($pdf)), unit:assert($labels[1]="i") , @@ -125,7 +125,7 @@ declare %unit:test function test:save(){ let $pdf:=test:open("samples.pdf/BaseX100.pdf") let $dest:=file:create-temp-file("test-save",".pdf") - let $savedPath:=pdfbox:save($pdf, $dest) + let $savedPath:=pdfbox:pdf-save($pdf, $dest) return unit:assert-equals($savedPath, $dest) }; @@ -133,7 +133,7 @@ function test:save(){ declare %unit:test function test:property(){ let $pdf:=test:open("samples.pdf/BaseX100.pdf") - let $pages:=pdfbox:property($pdf, "pageCount") + let $pages:=pdfbox:property($pdf, "#pages") return unit:assert(true()) }; @@ -141,8 +141,8 @@ function test:property(){ declare %unit:test("expected", "pdfbox:property") function test:property-bad(){ let $pdf:=test:open("samples.pdf/BaseX100.pdf") - let $title:=pdfbox:property($pdf, "badname") - return unit:assert(exists($title)) + let $p:=pdfbox:property($pdf, "badname") + return unit:assert(exists($p)) }; (:~ Test for pdfbox:property-names function :) declare %unit:test @@ -164,16 +164,16 @@ function test:report(){ declare %unit:test function test:hasOutline(){ let $pdf:=test:open("samples.pdf/BaseX100.pdf") - let $hasOutline:=pdfbox:hasOutline($pdf) - return unit:assert(not($hasOutline)) + let $marks:=pdfbox:number-of-bookmarks($pdf) + return unit:assert-equals($marks,0) }; (:~ Test for pdfbox:hasLabels function :) declare %unit:test function test:hasLabels(){ let $pdf:=test:open("samples.pdf/BaseX100.pdf") - let $hasLabels:=pdfbox:hasLabels($pdf) - return unit:assert($hasLabels) + let $labels:=pdfbox:number-of-labels($pdf) + return unit:assert-equals($labels,302) };