vscode-xml/src/formatting/formatters/v2-xml-formatter.ts
2020-07-08 23:08:56 -04:00

356 lines
12 KiB
TypeScript

import { XmlFormatter } from "../xml-formatter";
import { XmlFormattingOptions } from "../xml-formatting-options";
import { ClassicXmlFormatter } from "./classic-xml-formatter";
const MagicalStringOfWonders = "~::~MAAAGIC~::~";
/* tslint:disable no-use-before-declare */
export class V2XmlFormatter implements XmlFormatter {
formatXml(xml: string, options: XmlFormattingOptions): string {
// this replaces all "<" brackets inside of comments and CDATA to a magical string
// so the following minification steps don't mess with comment and CDATA formatting
xml = this._sanitizeCommentsAndCDATA(xml);
// remove whitespace from between tags, except for line breaks
xml = xml.replace(/>\s{0,}</g, (match: string) => {
return match.replace(/[^\S\r\n]/g, "");
});
// do some light minification to get rid of insignificant whitespace
xml = xml.replace(/"\s+(?=[^\s]+=)/g, "\" "); // spaces between attributes
xml = xml.replace(/"\s+(?=>)/g, "\""); // spaces between the last attribute and tag close (>)
xml = xml.replace(/"\s+(?=\/>)/g, "\" "); // spaces between the last attribute and tag close (/>)
xml = xml.replace(/(?!<!\[CDATA\[)[^ <>="]\s+[^ <>="]+=(?![^<]*?\]\]>)/g, (match: string) => { // spaces between the node name and the first attribute
return match.replace(/\s+/g, " ");
});
// the coast is clear - we can drop those "<" brackets back in
xml = this._unsanitizeCommentsAndCDATA(xml);
let output = "";
let indentLevel = options.initialIndentLevel || 0;
let attributeQuote = "";
let lineBreakSpree = false;
let lastWordCharacter: string | undefined;
let inMixedContent = false;
const locationHistory: Location[] = [Location.Text];
function isLastNonTextLocation(loc: Location): boolean {
for (let i = (locationHistory.length - 1); i >= 0; i--) {
if (locationHistory[i] !== Location.Text) {
return (loc === locationHistory[i]);
}
}
return false;
}
function isLocation(loc: Location): boolean {
return loc === locationHistory[locationHistory.length - 1];
}
function refreshMixedContentFlag(): void {
inMixedContent = (isLastNonTextLocation(Location.StartTag) || isLastNonTextLocation(Location.EndTag)) && lastWordCharacter !== undefined;
}
function setLocation(loc: Location): void {
if (loc === Location.Text) {
lastWordCharacter = undefined;
}
locationHistory.push(loc);
}
// NOTE: all "exiting" checks should appear after their associated "entering" checks
for (let i = 0; i < xml.length; i++) {
const cc = xml[i];
const nc = xml.charAt(i + 1);
const nnc = xml.charAt(i + 2);
const pc = xml.charAt(i - 1);
const ppc = xml.charAt(i - 2);
// entering CData
if (isLocation(Location.Text) && cc === "<" && nc === "!" && nnc === "[") {
if (pc === ">" && ppc !== "/") {
output += "<";
}
else {
output += `${this._getIndent(options, indentLevel)}<`;
}
setLocation(Location.CData);
}
// exiting CData
else if (isLocation(Location.CData) && cc === "]" && nc === "]" && nnc === ">") {
output += "]]>";
i += 2;
setLocation(Location.Text);
}
// entering Comment
else if (isLocation(Location.Text) && cc === "<" && nc === "!" && nnc === "-") {
output += `${this._getIndent(options, indentLevel)}<`;
setLocation(Location.Comment);
}
// exiting Comment
else if (isLocation(Location.Comment) && cc === "-" && nc === "-" && nnc === ">") {
output += "-->";
i += 2;
setLocation(Location.Text);
}
// entering SpecialTag
else if (isLocation(Location.Text) && cc === "<" && (nc === "!" || nc === "?")) {
output += `${this._getIndent(options, indentLevel)}<`;
setLocation(Location.SpecialTag);
}
// exiting SpecialTag
else if (isLocation(Location.SpecialTag) && cc === ">") {
output += `>`;
setLocation(Location.Text);
}
// entering StartTag.StartTagName
else if (isLocation(Location.Text) && cc === "<" && ["/", "!"].indexOf(nc) === -1) {
refreshMixedContentFlag();
// if this occurs after another tag, prepend a line break
// but do not add one if the previous tag was self-closing (it already adds its own)
if (pc === ">" && ppc !== "/" && !inMixedContent) {
output += `${options.newLine}${this._getIndent(options, indentLevel)}<`;
}
else if (!inMixedContent) {
// removing trailing non-breaking whitespace here prevents endless indentations (issue #193)
output = this._removeTrailingNonBreakingWhitespace(output);
output += `${this._getIndent(options, indentLevel)}<`;
}
else {
output += "<";
indentLevel--;
}
indentLevel++;
setLocation(Location.StartTagName);
}
// exiting StartTag.StartTagName, enter StartTag
else if (isLocation(Location.StartTagName) && cc === " ") {
output += " ";
setLocation(Location.StartTag);
}
// entering StartTag.Attribute
else if (isLocation(Location.StartTag) && [" ", "/", ">"].indexOf(cc) === -1) {
if (locationHistory[locationHistory.length - 2] === Location.AttributeValue
&& ((options.splitXmlnsOnFormat
&& xml.substr(i, 5).toLowerCase() === "xmlns")
|| options.splitAttributesOnFormat)) {
// trim the end of output here to ensure there is no trailing whitespace (issue #288)
output = this._removeTrailingNonBreakingWhitespace(output);
output += `${options.newLine}${this._getIndent(options, indentLevel)}`;
}
output += cc;
setLocation(Location.Attribute);
}
// entering StartTag.Attribute.AttributeValue
else if (isLocation(Location.Attribute) && (cc === "\"" || cc === "'")) {
output += cc;
setLocation(Location.AttributeValue);
attributeQuote = cc;
}
// exiting StartTag.Attribute.AttributeValue, entering StartTag
else if (isLocation(Location.AttributeValue) && cc === attributeQuote) {
output += cc;
setLocation(Location.StartTag);
attributeQuote = undefined;
}
// approaching the end of a self-closing tag where there was no whitespace (issue #149)
else if ((isLocation(Location.StartTag) || isLocation(Location.StartTagName))
&& cc === "/"
&& pc !== " "
&& options.enforcePrettySelfClosingTagOnFormat) {
output += " /";
}
// exiting StartTag or StartTag.StartTagName, entering Text
else if ((isLocation(Location.StartTag) || isLocation(Location.StartTagName)) && cc === ">") {
// if this was a self-closing tag, we need to decrement the indent level and add a newLine
if (pc === "/") {
indentLevel--;
output += ">";
// only add a newline here if one doesn't already exist (issue #147)
if (nc !== "\r" && nc !== "\n") {
output += options.newLine;
}
}
else {
output += ">";
}
// don't go directly from StartTagName to Text; go through StartTag first
if (isLocation(Location.StartTagName)) {
setLocation(Location.StartTag);
}
setLocation(Location.Text);
}
// entering EndTag
else if (isLocation(Location.Text) && cc === "<" && nc === "/") {
if (!inMixedContent) {
indentLevel--;
}
refreshMixedContentFlag();
// if the end tag immediately follows a line break, just add an indentation
// if the end tag immediately follows another end tag or a self-closing tag (issue #185), add a line break and indent
// otherwise, this should be treated as a same-line end tag(ex. <element>text</element>)
if ((pc === "\n" || lineBreakSpree) && !inMixedContent) {
// removing trailing non-breaking whitespace here prevents endless indentations (issue #193)
output = this._removeTrailingNonBreakingWhitespace(output);
output += `${this._getIndent(options, indentLevel)}<`;
lineBreakSpree = false;
}
else if (isLastNonTextLocation(Location.EndTag) && !inMixedContent) {
output += `${options.newLine}${this._getIndent(options, indentLevel)}<`;
}
else if (pc === ">" && ppc === "/" && !inMixedContent) {
output += `${this._getIndent(options, indentLevel)}<`;
}
else {
output += "<";
}
setLocation(Location.EndTag);
}
// exiting EndTag, entering Text
else if (isLocation(Location.EndTag) && cc === ">") {
output += ">";
setLocation(Location.Text);
inMixedContent = false;
}
// Text
else {
if (cc === "\n") {
lineBreakSpree = true;
lastWordCharacter = undefined;
}
else if (lineBreakSpree && /\S/.test(cc)) {
lineBreakSpree = false;
}
if (/[\w\d]/.test(cc)) {
lastWordCharacter = cc;
}
output += cc;
}
}
return output;
}
minifyXml(xml: string, options: XmlFormattingOptions): string {
return new ClassicXmlFormatter().minifyXml(xml, options);
}
private _getIndent(options: XmlFormattingOptions, indentLevel: number): string {
return ((options.editorOptions.insertSpaces) ? " ".repeat(options.editorOptions.tabSize) : "\t").repeat(Math.max(indentLevel, 0));
}
private _removeTrailingNonBreakingWhitespace(text: string): string {
return text.replace(/[^\r\n\S]+$/, "");
}
private _sanitizeCommentsAndCDATA(xml: string): string {
let output = "";
let inCommentOrCDATA = false;
for (let i = 0; i < xml.length; i++) {
const cc = xml[i];
const nc = xml.charAt(i + 1);
const nnc = xml.charAt(i + 2);
const pc = xml.charAt(i - 1);
if (!inCommentOrCDATA && cc === "<" && nc === "!" && (nnc === "-" || nnc === "[")) {
inCommentOrCDATA = true;
output += (nnc === "-") ? "<!--" : "<![CDATA[";
i += (nnc === "-") ? 3 : 8;
}
else if (inCommentOrCDATA && cc === "<") {
output += MagicalStringOfWonders;
}
else if (inCommentOrCDATA && (cc === "-" && nc === "-" && nnc === ">") || (cc === "]" && nc === "]" && nnc === ">")) {
inCommentOrCDATA = false;
output += (cc === "-") ? "-->" : "]]>";
i += 2;
}
else {
output += cc;
}
}
return output;
}
private _unsanitizeCommentsAndCDATA(xml: string): string {
return xml.replace(new RegExp(MagicalStringOfWonders, "g"), "<");
}
}
enum Location {
Attribute,
AttributeValue,
CData,
Comment,
EndTag,
SpecialTag,
StartTag,
StartTagName,
Text
}