142 lines
5 KiB
Text
142 lines
5 KiB
Text
(: Abstract syntax tree
|
|
@author Andy Bunce
|
|
:)
|
|
module namespace ast="lsp/ast";
|
|
(: import module namespace p="xq4" at "xq4.xqm"; :)
|
|
import module namespace xq4="java:quodatum.parser.xq4";
|
|
|
|
(:~ build
|
|
$opts uri,
|
|
abstract:true(),
|
|
position: true()
|
|
:)
|
|
declare function ast:build($text as xs:string,$opts as map(*):={})
|
|
as element(*){
|
|
let $xml:= xq4:parseModule($text)=>prof:time("⏱️ parseModule " || $opts?uri)
|
|
let $res:= if($opts?abstract)
|
|
then ast:flatten($xml)=>prof:time("⏱️ abstract ")
|
|
else $xml
|
|
|
|
let $res:=if($opts?position)
|
|
then ast:annotate-with-positions($res)=>prof:time("⏱️ position ")
|
|
else $res
|
|
|
|
return $res
|
|
};
|
|
|
|
(:~
|
|
:concrete to abstract: simplify by omitting elements with only one child
|
|
:)
|
|
declare function ast:flatten($input as element()) as element() {
|
|
if (1=count($input/*))
|
|
then ast:flatten($input/*)
|
|
else element {node-name($input) }
|
|
{$input/@*,
|
|
for $child in $input/node()
|
|
return
|
|
if ($child instance of element())
|
|
then ast:flatten($child)
|
|
else $child
|
|
}
|
|
};
|
|
|
|
declare function ast:add-positions($nodes as node()*, $start-pos as xs:integer) as item()+ {
|
|
if (empty($nodes)) then (
|
|
$start-pos, ()
|
|
) else (
|
|
let $head := $nodes[1]
|
|
let $tail := $nodes[position() > 1]
|
|
return
|
|
typeswitch($head)
|
|
case element() return
|
|
let $children := $head/node()
|
|
let $child-result := ast:add-positions($children, $start-pos)
|
|
let $child-end-pos := $child-result[1]
|
|
let $processed-children := subsequence($child-result, 2)
|
|
let $element-text := string-join($processed-children ! string(.), "")
|
|
let $element-length := string-length($element-text)
|
|
let $element-end-pos :=
|
|
if ($element-length > 0) then $start-pos + $element-length - 1
|
|
else $start-pos
|
|
let $tail-result :=
|
|
if (exists($tail)) then
|
|
ast:add-positions($tail, $element-end-pos + 1)
|
|
else (
|
|
$element-end-pos + 1, ()
|
|
)
|
|
let $final-end-pos := $tail-result[1]
|
|
let $processed-tail := subsequence($tail-result, 2)
|
|
return (
|
|
$final-end-pos,
|
|
element {node-name($head)} {
|
|
attribute start {$start-pos},
|
|
attribute end {$element-end-pos},
|
|
$processed-children
|
|
},
|
|
$processed-tail
|
|
)
|
|
case text() return
|
|
let $text-length := string-length($head)
|
|
let $text-end-pos :=
|
|
if ($text-length > 0) then $start-pos + $text-length - 1
|
|
else $start-pos
|
|
let $tail-result :=
|
|
if (exists($tail)) then
|
|
ast:add-positions($tail, $text-end-pos + 1)
|
|
else (
|
|
$text-end-pos + 1, ()
|
|
)
|
|
let $final-end-pos := $tail-result[1]
|
|
let $processed-tail := subsequence($tail-result, 2)
|
|
return (
|
|
$final-end-pos,
|
|
$head,
|
|
$processed-tail
|
|
)
|
|
default return
|
|
let $tail-result :=
|
|
if (exists($tail)) then
|
|
ast:add-positions($tail, $start-pos)
|
|
else (
|
|
$start-pos, ()
|
|
)
|
|
let $final-end-pos := $tail-result[1]
|
|
let $processed-tail := subsequence($tail-result, 2)
|
|
return (
|
|
$final-end-pos,
|
|
$head,
|
|
$processed-tail
|
|
)
|
|
)
|
|
};
|
|
|
|
declare function ast:annotate-with-positions($xml as element()) as element() {
|
|
let $result := ast:add-positions($xml, 1)
|
|
return $result[2]
|
|
};
|
|
|
|
(:-------reporting------------------------------------:)
|
|
|
|
declare function ast:report($el as element(*)) {
|
|
{
|
|
"total-elements": count($el//element()),
|
|
"max-depth": ast:max-node-depth($el),
|
|
"char-count": string-length($el)
|
|
}
|
|
};
|
|
|
|
declare function ast:max-depth($nodes as node()*) as xs:integer {
|
|
if (empty($nodes)) then 0
|
|
else
|
|
max((
|
|
for $node in $nodes
|
|
return
|
|
if ($node instance of element())
|
|
then ast:max-depth($node/node()) + 1
|
|
else ast:max-depth($node/node())
|
|
))
|
|
};
|
|
|
|
declare function ast:max-node-depth($root as node()) as xs:integer {
|
|
ast:max-depth($root)
|
|
};
|