1
0

[add] github workflow

This commit is contained in:
Andy Bunce 2025-01-03 16:03:13 +00:00
parent dd7b0c1d83
commit e987fcf8ba
10 changed files with 42 additions and 6 deletions

30
.github/workflows/ci.yaml vendored Normal file
View File

@ -0,0 +1,30 @@
name: Run BaseX Tests
on:
pull_request:
branches:
- main
workflow_dispatch: # Enables manual trigger
jobs:
test:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Set up Java
uses: actions/setup-java@v2
with:
java-version: '11'
distribution: 'temurin
- name: Install BaseX
run: |
wget http://files.basex.org/releases/10.7/BaseX107.zip
unzip BaseX107.zip -d basex
- name: Run BaseX Tests
run: |
./basex/bin/basex -t .

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -7,7 +7,7 @@
"doc": "docs" "doc": "docs"
}, },
"scripts": { "scripts": {
"test": "basex -t ." "test": "%BASEX10%/bin/basex -t ."
}, },
"keywords": [ "keywords": [
"pdf", "pdf",

Binary file not shown.

View File

@ -4,4 +4,5 @@
* [BaseX100.pdf](https://files.basex.org/releases/10.0/BaseX100.pdf) * [BaseX100.pdf](https://files.basex.org/releases/10.0/BaseX100.pdf)
* [icelandic-dictionary.pdf](http://css4.pub/2015/icelandic/dictionary.pdf) * [icelandic-dictionary.pdf](http://css4.pub/2015/icelandic/dictionary.pdf)
* [page-numbers.pdf](https://www.w3.org/WAI/WCAG22/working-examples/pdf-page-numbers/page-numbers). * [page-numbers.pdf](https://www.w3.org/WAI/WCAG22/working-examples/pdf-page-numbers/page-numbers).
* [Sentience-in-Cephalopod-Molluscs-and-Decapod-Crustaceans](https://www.lse.ac.uk/News/News-Assets/PDFs/2021/Sentience-in-Cephalopod-Molluscs-and-Decapod-Crustaceans-Final-Report-November-2021.pdfpdf) * [Sentience-in-Cephalopod-Molluscs-and-Decapod-Crustaceans](https://www.lse.ac.uk/News/News-Assets/PDFs/2021/Sentience-in-Cephalopod-Molluscs-and-Decapod-Crustaceans-Final-Report-November-2021.pdf)
* [Legal RAG Hallucinations](https://law.stanford.edu/wp-content/uploads/2024/05/Legal_RAG_Hallucinations.pdf)

View File

@ -35,7 +35,7 @@ as element(page){
return <page index="{ $page }">{ $found, $line1 }</page> return <page index="{ $page }">{ $found, $line1 }</page>
}; };
(:~ empty or attributes created by matching $style with $line1 :) (:~ attributes created by matching $style with $line1 or empty :)
declare function pdfscrape:line-report($style as xs:string, $line1 as xs:string) declare function pdfscrape:line-report($style as xs:string, $line1 as xs:string)
as attribute(*)*{ as attribute(*)*{
if(matches($line1,$pdfscrape:pats?($style))) if(matches($line1,$pdfscrape:pats?($style)))
@ -52,10 +52,12 @@ as map(*) {
$pages[@number]!map:entry(string(@number),string(@index)) $pages[@number]!map:entry(string(@number),string(@index))
=>map:merge(map{"duplicates":"combine"}) =>map:merge(map{"duplicates":"combine"})
}; };
(:~ %match (:~ %match
$l page labels $l page labels
:) :)
declare function pdfscrape:score($l as xs:string*,$report as element(page)*) declare function pdfscrape:score($l as xs:string*,
$report as element(page)*)
{ {
let $s:=$report!(if(@number)then string(@number) else "") let $s:=$report!(if(@number)then string(@number) else "")
let $match:= for-each-pair($l,$s,function($l,$s){if($s eq "")then 0 else if ($s eq $l)then 1 else -1}) let $match:= for-each-pair($l,$s,function($l,$s){if($s eq "")then 0 else if ($s eq $l)then 1 else -1})
@ -76,3 +78,7 @@ as xs:integer{
=> array:head() => array:head()
}; };
declare function pdfscrape:characters($str as xs:string)
{
};

View File

@ -19,6 +19,5 @@ $samples?world=>file:resolve-path($base)
let $doc:=pdfbox:open($PDF)
return pdfbox:information($doc) pdfbox:report($PDF)