basex-lsp/webapp/lsp/ast/ast.xqm
2025-10-30 12:51:14 +00:00

142 lines
5 KiB
Text

(: Abstract syntax tree
@author Andy Bunce
:)
module namespace ast="lsp/ast";
(: import module namespace p="xq4" at "xq4.xqm"; :)
import module namespace xq4="java:quodatum.parser.xq4";
(:~ build
$opts uri,
abstract:true(),
position: true()
:)
declare function ast:build($text as xs:string,$opts as map(*):={})
as element(*){
let $xml:= xq4:parseModule($text)=>prof:time("⏱️ parseModule " || $opts?uri)
let $res:= if($opts?abstract)
then ast:flatten($xml)=>prof:time("⏱️ abstract ")
else $xml
let $res:=if($opts?position)
then ast:annotate-with-positions($res)=>prof:time("⏱️ position ")
else $res
return $res
};
(:~
:concrete to abstract: simplify by omitting elements with only one child
:)
declare function ast:flatten($input as element()) as element() {
if (1=count($input/*))
then ast:flatten($input/*)
else element {node-name($input) }
{$input/@*,
for $child in $input/node()
return
if ($child instance of element())
then ast:flatten($child)
else $child
}
};
declare function ast:add-positions($nodes as node()*, $start-pos as xs:integer) as item()+ {
if (empty($nodes)) then (
$start-pos, ()
) else (
let $head := $nodes[1]
let $tail := $nodes[position() > 1]
return
typeswitch($head)
case element() return
let $children := $head/node()
let $child-result := ast:add-positions($children, $start-pos)
let $child-end-pos := $child-result[1]
let $processed-children := subsequence($child-result, 2)
let $element-text := string-join($processed-children ! string(.), "")
let $element-length := string-length($element-text)
let $element-end-pos :=
if ($element-length > 0) then $start-pos + $element-length - 1
else $start-pos
let $tail-result :=
if (exists($tail)) then
ast:add-positions($tail, $element-end-pos + 1)
else (
$element-end-pos + 1, ()
)
let $final-end-pos := $tail-result[1]
let $processed-tail := subsequence($tail-result, 2)
return (
$final-end-pos,
element {node-name($head)} {
attribute start {$start-pos},
attribute end {$element-end-pos},
$processed-children
},
$processed-tail
)
case text() return
let $text-length := string-length($head)
let $text-end-pos :=
if ($text-length > 0) then $start-pos + $text-length - 1
else $start-pos
let $tail-result :=
if (exists($tail)) then
ast:add-positions($tail, $text-end-pos + 1)
else (
$text-end-pos + 1, ()
)
let $final-end-pos := $tail-result[1]
let $processed-tail := subsequence($tail-result, 2)
return (
$final-end-pos,
$head,
$processed-tail
)
default return
let $tail-result :=
if (exists($tail)) then
ast:add-positions($tail, $start-pos)
else (
$start-pos, ()
)
let $final-end-pos := $tail-result[1]
let $processed-tail := subsequence($tail-result, 2)
return (
$final-end-pos,
$head,
$processed-tail
)
)
};
declare function ast:annotate-with-positions($xml as element()) as element() {
let $result := ast:add-positions($xml, 1)
return $result[2]
};
(:-------reporting------------------------------------:)
declare function ast:report($el as element(*)) {
{
"total-elements": count($el//element()),
"max-depth": ast:max-node-depth($el),
"char-count": string-length($el)
}
};
declare function ast:max-depth($nodes as node()*) as xs:integer {
if (empty($nodes)) then 0
else
max((
for $node in $nodes
return
if ($node instance of element())
then ast:max-depth($node/node()) + 1
else ast:max-depth($node/node())
))
};
declare function ast:max-node-depth($root as node()) as xs:integer {
ast:max-depth($root)
};