[add] github workflow
This commit is contained in:
parent
dd7b0c1d83
commit
e987fcf8ba
30
.github/workflows/ci.yaml
vendored
Normal file
30
.github/workflows/ci.yaml
vendored
Normal file
@ -0,0 +1,30 @@
|
||||
name: Run BaseX Tests
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
workflow_dispatch: # Enables manual trigger
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Set up Java
|
||||
uses: actions/setup-java@v2
|
||||
with:
|
||||
java-version: '11'
|
||||
distribution: 'temurin
|
||||
|
||||
- name: Install BaseX
|
||||
run: |
|
||||
wget http://files.basex.org/releases/10.7/BaseX107.zip
|
||||
unzip BaseX107.zip -d basex
|
||||
|
||||
- name: Run BaseX Tests
|
||||
run: |
|
||||
./basex/bin/basex -t .
|
BIN
lib/pdfbox-3.0.2/commons-logging-1.3.1.jar
Normal file
BIN
lib/pdfbox-3.0.2/commons-logging-1.3.1.jar
Normal file
Binary file not shown.
BIN
lib/pdfbox-3.0.2/fontbox-3.0.2.jar
Normal file
BIN
lib/pdfbox-3.0.2/fontbox-3.0.2.jar
Normal file
Binary file not shown.
BIN
lib/pdfbox-3.0.2/pdfbox-3.0.2.jar
Normal file
BIN
lib/pdfbox-3.0.2/pdfbox-3.0.2.jar
Normal file
Binary file not shown.
BIN
lib/pdfbox-3.0.2/pdfbox-io-3.0.2.jar
Normal file
BIN
lib/pdfbox-3.0.2/pdfbox-io-3.0.2.jar
Normal file
Binary file not shown.
@ -7,7 +7,7 @@
|
||||
"doc": "docs"
|
||||
},
|
||||
"scripts": {
|
||||
"test": "basex -t ."
|
||||
"test": "%BASEX10%/bin/basex -t ."
|
||||
},
|
||||
"keywords": [
|
||||
"pdf",
|
||||
|
BIN
samples.pdf/Legal_RAG_Hallucinations.pdf
Normal file
BIN
samples.pdf/Legal_RAG_Hallucinations.pdf
Normal file
Binary file not shown.
@ -4,4 +4,5 @@
|
||||
* [BaseX100.pdf](https://files.basex.org/releases/10.0/BaseX100.pdf)
|
||||
* [icelandic-dictionary.pdf](http://css4.pub/2015/icelandic/dictionary.pdf)
|
||||
* [page-numbers.pdf](https://www.w3.org/WAI/WCAG22/working-examples/pdf-page-numbers/page-numbers).
|
||||
* [Sentience-in-Cephalopod-Molluscs-and-Decapod-Crustaceans](https://www.lse.ac.uk/News/News-Assets/PDFs/2021/Sentience-in-Cephalopod-Molluscs-and-Decapod-Crustaceans-Final-Report-November-2021.pdfpdf)
|
||||
* [Sentience-in-Cephalopod-Molluscs-and-Decapod-Crustaceans](https://www.lse.ac.uk/News/News-Assets/PDFs/2021/Sentience-in-Cephalopod-Molluscs-and-Decapod-Crustaceans-Final-Report-November-2021.pdf)
|
||||
* [Legal RAG Hallucinations](https://law.stanford.edu/wp-content/uploads/2024/05/Legal_RAG_Hallucinations.pdf)
|
||||
|
@ -35,7 +35,7 @@ as element(page){
|
||||
return <page index="{ $page }">{ $found, $line1 }</page>
|
||||
};
|
||||
|
||||
(:~ empty or attributes created by matching $style with $line1 :)
|
||||
(:~ attributes created by matching $style with $line1 or empty :)
|
||||
declare function pdfscrape:line-report($style as xs:string, $line1 as xs:string)
|
||||
as attribute(*)*{
|
||||
if(matches($line1,$pdfscrape:pats?($style)))
|
||||
@ -52,10 +52,12 @@ as map(*) {
|
||||
$pages[@number]!map:entry(string(@number),string(@index))
|
||||
=>map:merge(map{"duplicates":"combine"})
|
||||
};
|
||||
|
||||
(:~ %match
|
||||
$l page labels
|
||||
:)
|
||||
declare function pdfscrape:score($l as xs:string*,$report as element(page)*)
|
||||
declare function pdfscrape:score($l as xs:string*,
|
||||
$report as element(page)*)
|
||||
{
|
||||
let $s:=$report!(if(@number)then string(@number) else "")
|
||||
let $match:= for-each-pair($l,$s,function($l,$s){if($s eq "")then 0 else if ($s eq $l)then 1 else -1})
|
||||
@ -76,3 +78,7 @@ as xs:integer{
|
||||
=> array:head()
|
||||
};
|
||||
|
||||
declare function pdfscrape:characters($str as xs:string)
|
||||
{
|
||||
|
||||
};
|
||||
|
@ -19,6 +19,5 @@ $samples?world=>file:resolve-path($base)
|
||||
|
||||
|
||||
|
||||
let $doc:=pdfbox:open($PDF)
|
||||
|
||||
return pdfbox:information($doc)
|
||||
pdfbox:report($PDF)
|
Loading…
Reference in New Issue
Block a user