(: Abstract syntax tree @author Andy Bunce :) module namespace ast="lsp/ast"; (: import module namespace p="xq4" at "xq4.xqm"; :) import module namespace xq4="java:quodatum.parser.xq4"; (:~ build $opts uri, abstract:true(), position: true() :) declare function ast:build($text as xs:string,$opts as map(*):={}) as element(*){ let $xml:= xq4:parseModule($text)=>prof:time("⏱️ parseModule " || $opts?uri) let $res:= if($opts?abstract) then ast:flatten($xml)=>prof:time("⏱️ abstract ") else $xml let $res:=if($opts?position) then ast:annotate-with-positions($res)=>prof:time("⏱️ position ") else $res return $res }; (:~ :concrete to abstract: simplify by omitting elements with only one child :) declare function ast:flatten($input as element()) as element() { if (1=count($input/*)) then ast:flatten($input/*) else element {node-name($input) } {$input/@*, for $child in $input/node() return if ($child instance of element()) then ast:flatten($child) else $child } }; declare function ast:add-positions($nodes as node()*, $start-pos as xs:integer) as item()+ { if (empty($nodes)) then ( $start-pos, () ) else ( let $head := $nodes[1] let $tail := $nodes[position() > 1] return typeswitch($head) case element() return let $children := $head/node() let $child-result := ast:add-positions($children, $start-pos) let $child-end-pos := $child-result[1] let $processed-children := subsequence($child-result, 2) let $element-text := string-join($processed-children ! string(.), "") let $element-length := string-length($element-text) let $element-end-pos := if ($element-length > 0) then $start-pos + $element-length - 1 else $start-pos let $tail-result := if (exists($tail)) then ast:add-positions($tail, $element-end-pos + 1) else ( $element-end-pos + 1, () ) let $final-end-pos := $tail-result[1] let $processed-tail := subsequence($tail-result, 2) return ( $final-end-pos, element {node-name($head)} { attribute start {$start-pos}, attribute end {$element-end-pos}, $processed-children }, $processed-tail ) case text() return let $text-length := string-length($head) let $text-end-pos := if ($text-length > 0) then $start-pos + $text-length - 1 else $start-pos let $tail-result := if (exists($tail)) then ast:add-positions($tail, $text-end-pos + 1) else ( $text-end-pos + 1, () ) let $final-end-pos := $tail-result[1] let $processed-tail := subsequence($tail-result, 2) return ( $final-end-pos, $head, $processed-tail ) default return let $tail-result := if (exists($tail)) then ast:add-positions($tail, $start-pos) else ( $start-pos, () ) let $final-end-pos := $tail-result[1] let $processed-tail := subsequence($tail-result, 2) return ( $final-end-pos, $head, $processed-tail ) ) }; declare function ast:annotate-with-positions($xml as element()) as element() { let $result := ast:add-positions($xml, 1) return $result[2] }; (:-------reporting------------------------------------:) declare function ast:report($el as element(*)) { { "total-elements": count($el//element()), "max-depth": ast:max-node-depth($el), "char-count": string-length($el) } }; declare function ast:max-depth($nodes as node()*) as xs:integer { if (empty($nodes)) then 0 else max(( for $node in $nodes return if ($node instance of element()) then ast:max-depth($node/node()) + 1 else ast:max-depth($node/node()) )) }; declare function ast:max-node-depth($root as node()) as xs:integer { ast:max-depth($root) };