[add] github workflow
This commit is contained in:
parent
dd7b0c1d83
commit
e987fcf8ba
30
.github/workflows/ci.yaml
vendored
Normal file
30
.github/workflows/ci.yaml
vendored
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
name: Run BaseX Tests
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
workflow_dispatch: # Enables manual trigger
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
|
||||||
|
- name: Set up Java
|
||||||
|
uses: actions/setup-java@v2
|
||||||
|
with:
|
||||||
|
java-version: '11'
|
||||||
|
distribution: 'temurin
|
||||||
|
|
||||||
|
- name: Install BaseX
|
||||||
|
run: |
|
||||||
|
wget http://files.basex.org/releases/10.7/BaseX107.zip
|
||||||
|
unzip BaseX107.zip -d basex
|
||||||
|
|
||||||
|
- name: Run BaseX Tests
|
||||||
|
run: |
|
||||||
|
./basex/bin/basex -t .
|
BIN
lib/pdfbox-3.0.2/commons-logging-1.3.1.jar
Normal file
BIN
lib/pdfbox-3.0.2/commons-logging-1.3.1.jar
Normal file
Binary file not shown.
BIN
lib/pdfbox-3.0.2/fontbox-3.0.2.jar
Normal file
BIN
lib/pdfbox-3.0.2/fontbox-3.0.2.jar
Normal file
Binary file not shown.
BIN
lib/pdfbox-3.0.2/pdfbox-3.0.2.jar
Normal file
BIN
lib/pdfbox-3.0.2/pdfbox-3.0.2.jar
Normal file
Binary file not shown.
BIN
lib/pdfbox-3.0.2/pdfbox-io-3.0.2.jar
Normal file
BIN
lib/pdfbox-3.0.2/pdfbox-io-3.0.2.jar
Normal file
Binary file not shown.
@ -7,7 +7,7 @@
|
|||||||
"doc": "docs"
|
"doc": "docs"
|
||||||
},
|
},
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"test": "basex -t ."
|
"test": "%BASEX10%/bin/basex -t ."
|
||||||
},
|
},
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"pdf",
|
"pdf",
|
||||||
|
BIN
samples.pdf/Legal_RAG_Hallucinations.pdf
Normal file
BIN
samples.pdf/Legal_RAG_Hallucinations.pdf
Normal file
Binary file not shown.
@ -4,4 +4,5 @@
|
|||||||
* [BaseX100.pdf](https://files.basex.org/releases/10.0/BaseX100.pdf)
|
* [BaseX100.pdf](https://files.basex.org/releases/10.0/BaseX100.pdf)
|
||||||
* [icelandic-dictionary.pdf](http://css4.pub/2015/icelandic/dictionary.pdf)
|
* [icelandic-dictionary.pdf](http://css4.pub/2015/icelandic/dictionary.pdf)
|
||||||
* [page-numbers.pdf](https://www.w3.org/WAI/WCAG22/working-examples/pdf-page-numbers/page-numbers).
|
* [page-numbers.pdf](https://www.w3.org/WAI/WCAG22/working-examples/pdf-page-numbers/page-numbers).
|
||||||
* [Sentience-in-Cephalopod-Molluscs-and-Decapod-Crustaceans](https://www.lse.ac.uk/News/News-Assets/PDFs/2021/Sentience-in-Cephalopod-Molluscs-and-Decapod-Crustaceans-Final-Report-November-2021.pdfpdf)
|
* [Sentience-in-Cephalopod-Molluscs-and-Decapod-Crustaceans](https://www.lse.ac.uk/News/News-Assets/PDFs/2021/Sentience-in-Cephalopod-Molluscs-and-Decapod-Crustaceans-Final-Report-November-2021.pdf)
|
||||||
|
* [Legal RAG Hallucinations](https://law.stanford.edu/wp-content/uploads/2024/05/Legal_RAG_Hallucinations.pdf)
|
||||||
|
@ -35,7 +35,7 @@ as element(page){
|
|||||||
return <page index="{ $page }">{ $found, $line1 }</page>
|
return <page index="{ $page }">{ $found, $line1 }</page>
|
||||||
};
|
};
|
||||||
|
|
||||||
(:~ empty or attributes created by matching $style with $line1 :)
|
(:~ attributes created by matching $style with $line1 or empty :)
|
||||||
declare function pdfscrape:line-report($style as xs:string, $line1 as xs:string)
|
declare function pdfscrape:line-report($style as xs:string, $line1 as xs:string)
|
||||||
as attribute(*)*{
|
as attribute(*)*{
|
||||||
if(matches($line1,$pdfscrape:pats?($style)))
|
if(matches($line1,$pdfscrape:pats?($style)))
|
||||||
@ -52,10 +52,12 @@ as map(*) {
|
|||||||
$pages[@number]!map:entry(string(@number),string(@index))
|
$pages[@number]!map:entry(string(@number),string(@index))
|
||||||
=>map:merge(map{"duplicates":"combine"})
|
=>map:merge(map{"duplicates":"combine"})
|
||||||
};
|
};
|
||||||
|
|
||||||
(:~ %match
|
(:~ %match
|
||||||
$l page labels
|
$l page labels
|
||||||
:)
|
:)
|
||||||
declare function pdfscrape:score($l as xs:string*,$report as element(page)*)
|
declare function pdfscrape:score($l as xs:string*,
|
||||||
|
$report as element(page)*)
|
||||||
{
|
{
|
||||||
let $s:=$report!(if(@number)then string(@number) else "")
|
let $s:=$report!(if(@number)then string(@number) else "")
|
||||||
let $match:= for-each-pair($l,$s,function($l,$s){if($s eq "")then 0 else if ($s eq $l)then 1 else -1})
|
let $match:= for-each-pair($l,$s,function($l,$s){if($s eq "")then 0 else if ($s eq $l)then 1 else -1})
|
||||||
@ -76,3 +78,7 @@ as xs:integer{
|
|||||||
=> array:head()
|
=> array:head()
|
||||||
};
|
};
|
||||||
|
|
||||||
|
declare function pdfscrape:characters($str as xs:string)
|
||||||
|
{
|
||||||
|
|
||||||
|
};
|
||||||
|
@ -19,6 +19,5 @@ $samples?world=>file:resolve-path($base)
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
let $doc:=pdfbox:open($PDF)
|
|
||||||
|
|
||||||
return pdfbox:information($doc)
|
pdfbox:report($PDF)
|
Loading…
Reference in New Issue
Block a user