1
0

[add] github workflow

This commit is contained in:
Andy Bunce 2025-01-03 16:03:13 +00:00
parent dd7b0c1d83
commit e987fcf8ba
10 changed files with 42 additions and 6 deletions

30
.github/workflows/ci.yaml vendored Normal file
View File

@ -0,0 +1,30 @@
name: Run BaseX Tests
on:
pull_request:
branches:
- main
workflow_dispatch: # Enables manual trigger
jobs:
test:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Set up Java
uses: actions/setup-java@v2
with:
java-version: '11'
distribution: 'temurin
- name: Install BaseX
run: |
wget http://files.basex.org/releases/10.7/BaseX107.zip
unzip BaseX107.zip -d basex
- name: Run BaseX Tests
run: |
./basex/bin/basex -t .

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -7,7 +7,7 @@
"doc": "docs"
},
"scripts": {
"test": "basex -t ."
"test": "%BASEX10%/bin/basex -t ."
},
"keywords": [
"pdf",

Binary file not shown.

View File

@ -4,4 +4,5 @@
* [BaseX100.pdf](https://files.basex.org/releases/10.0/BaseX100.pdf)
* [icelandic-dictionary.pdf](http://css4.pub/2015/icelandic/dictionary.pdf)
* [page-numbers.pdf](https://www.w3.org/WAI/WCAG22/working-examples/pdf-page-numbers/page-numbers).
* [Sentience-in-Cephalopod-Molluscs-and-Decapod-Crustaceans](https://www.lse.ac.uk/News/News-Assets/PDFs/2021/Sentience-in-Cephalopod-Molluscs-and-Decapod-Crustaceans-Final-Report-November-2021.pdfpdf)
* [Sentience-in-Cephalopod-Molluscs-and-Decapod-Crustaceans](https://www.lse.ac.uk/News/News-Assets/PDFs/2021/Sentience-in-Cephalopod-Molluscs-and-Decapod-Crustaceans-Final-Report-November-2021.pdf)
* [Legal RAG Hallucinations](https://law.stanford.edu/wp-content/uploads/2024/05/Legal_RAG_Hallucinations.pdf)

View File

@ -35,7 +35,7 @@ as element(page){
return <page index="{ $page }">{ $found, $line1 }</page>
};
(:~ empty or attributes created by matching $style with $line1 :)
(:~ attributes created by matching $style with $line1 or empty :)
declare function pdfscrape:line-report($style as xs:string, $line1 as xs:string)
as attribute(*)*{
if(matches($line1,$pdfscrape:pats?($style)))
@ -52,10 +52,12 @@ as map(*) {
$pages[@number]!map:entry(string(@number),string(@index))
=>map:merge(map{"duplicates":"combine"})
};
(:~ %match
$l page labels
:)
declare function pdfscrape:score($l as xs:string*,$report as element(page)*)
declare function pdfscrape:score($l as xs:string*,
$report as element(page)*)
{
let $s:=$report!(if(@number)then string(@number) else "")
let $match:= for-each-pair($l,$s,function($l,$s){if($s eq "")then 0 else if ($s eq $l)then 1 else -1})
@ -76,3 +78,7 @@ as xs:integer{
=> array:head()
};
declare function pdfscrape:characters($str as xs:string)
{
};

View File

@ -19,6 +19,5 @@ $samples?world=>file:resolve-path($base)
let $doc:=pdfbox:open($PDF)
return pdfbox:information($doc)
pdfbox:report($PDF)