vscode-basex/src/services/XmlFormatter.ts
Josh Johnson fd5749a49a Fix Eager Line Break Stripping
This change ensures line breaks are not stripped outside of CDATA nodes
if the line break is not preceded or followed by another whitespace
character.

Fixes #92
2017-06-20 22:50:21 -04:00

194 lines
No EOL
7.1 KiB
TypeScript

'use strict';
// Based on pretty-data (https://github.com/vkiryukhin/pretty-data)
export class XmlFormatter {
constructor(options?: IXmlFormatterOptions) {
options = options || {};
if (typeof options.preferSpaces === 'undefined') {
options.preferSpaces = false;
}
if (typeof options.splitNamespaces === 'undefined') {
options.splitNamespaces = true;
}
options.tabSize = options.tabSize || 4;
options.newLine = options.newLine || '\n';
this.newLine = options.newLine || '\n';
this.indentPattern = (options.preferSpaces) ? ' '.repeat(options.tabSize) : '\t';
this.splitNamespaces = options.splitNamespaces;
}
newLine: string;
indentPattern: string;
splitNamespaces: boolean;
format(xml: string): string {
xml = this.minify(xml, false);
xml = xml.replace(/</g, '~::~<');
if (this.splitNamespaces) {
xml = xml
.replace(/xmlns\:/g, '~::~xmlns:')
.replace(/xmlns\=/g, '~::~xmlns=');
}
let parts: string[] = xml.split('~::~');
let inComment: boolean = false;
let level: number = 0;
let output: string = '';
for (let i = 0; i < parts.length; i++) {
// <!
if (parts[i].search(/<!/) > -1) {
output += this._getIndent(level, parts[i]);
inComment = true;
// end <!
if (parts[i].search(/-->/) > -1 || parts[i].search(/\]>/) > -1 || parts[i].search(/!DOCTYPE/) > -1) {
inComment = false;
}
}
// end <!
else if (parts[i].search(/-->/) > -1 || parts[i].search(/\]>/) > -1) {
output += parts[i];
inComment = false;
}
// <elm></elm>
else if (/^<(\w|:)/.test(parts[i - 1]) && /^<\/(\w|:)/.test(parts[i])
&& /^<[\w:\-\.\,]+/.exec(parts[i - 1])[0] == /^<\/[\w:\-\.\,]+/.exec(parts[i])[0].replace('/', '')) {
output += parts[i];
if (!inComment) level--;
}
// <elm>
else if (parts[i].search(/<(\w|:)/) > -1 && parts[i].search(/<\//) == -1 && parts[i].search(/\/>/) == -1) {
output = (!inComment) ? output += this._getIndent(level++, parts[i]) : output += parts[i];
}
// <elm>...</elm>
else if (parts[i].search(/<(\w|:)/) > -1 && parts[i].search(/<\//) > -1) {
output = (!inComment) ? output += this._getIndent(level, parts[i]) : output += parts[i];
}
// </elm>
else if (parts[i].search(/<\//) > -1) {
output = (!inComment) ? output += this._getIndent(--level, parts[i]) : output += parts[i];
}
// <elm />
else if (parts[i].search(/\/>/) > -1 && (!this.splitNamespaces || parts[i].search(/xmlns\:/) == -1)) {
output = (!inComment) ? output += this._getIndent(level, parts[i]) : output += parts[i];
}
// xmlns />
else if (parts[i].search(/\/>/) > -1 && parts[i].search(/xmlns\:/) > -1 && this.splitNamespaces) {
output = (!inComment) ? output += this._getIndent(level--, parts[i]) : output += parts[i];
}
// <?xml ... ?>
else if (parts[i].search(/<\?/) > -1) {
output += this._getIndent(level, parts[i]);
}
// xmlns
else if (parts[i].search(/xmlns\:/) > -1 || parts[i].search(/xmlns\=/) > -1) {
output += this._getIndent(level, parts[i]);
}
else {
output += parts[i];
}
}
// remove leading newline
if (output[0] == this.newLine) {
output = output.slice(1);
}
else if (output.substring(0, 1) == this.newLine) {
output = output.slice(2);
}
return output;
}
minify(xml: string, removeComments?: boolean): string {
if (typeof removeComments === 'undefined') {
removeComments = false;
}
xml = this._stripLineBreaks(xml); // all line breaks outside of CDATA elements
xml = (removeComments) ? xml.replace(/\<![ \r\n\t]*(--([^\-]|[\r\n]|-[^\-])*--[ \r\n\t]*)\>/g, '') : xml;
xml = xml.replace(/>\s{0,}</g, '><'); // insignificant whitespace between tags
xml = xml.replace(/"\s+(?=[^\s]+=)/g, '" '); // spaces between attributes
xml = xml.replace(/"\s+(?=>)/g, '"'); // spaces between the last attribute and tag close (>)
xml = xml.replace(/"\s+(?=\/>)/g, '" '); // spaces between the last attribute and tag close (/>)
xml = xml.replace(/[^ <>="]\s+[^ <>="]+=/g, (match: string) => { // spaces between the node name and the first attribute
return match.replace(/\s+/g, ' ');
});
return xml;
}
private _getIndent(level: number, trailingValue?: string): string {
trailingValue = trailingValue || '';
return `${this.newLine}${this.indentPattern.repeat(level)}${trailingValue}`;
}
private _stripLineBreaks(xml: string): string {
let output: string = '';
let inTag: boolean = false;
let inTagName: boolean = false;
let inCdata: boolean = false;
let inAttribute: boolean = false;
for (let i = 0; i < xml.length; i++) {
let char: string = xml.charAt(i);
let prev: string = xml.charAt(i - 1);
let next: string = xml.charAt(i + 1);
if (char == '!' && (xml.substr(i, 8) == '![CDATA[' || xml.substr(i, 3) == '!--')) {
inCdata = true;
}
else if (char == ']' && (xml.substr(i, 3) == ']]>')) {
inCdata = false;
}
else if (char == '-' && (xml.substr(i, 3) == '-->')) {
inCdata = false;
}
else if (char.search(/[\r\n]/g) > -1 && !inCdata) {
if (/\r/.test(char) && /\S|\r|\n/.test(prev) && /\S|\r|\n/.test(xml.charAt(i + this.newLine.length))) {
output += char;
}
else if (/\n/.test(char) && /\S|\r|\n/.test(xml.charAt(i - this.newLine.length)) && /\S|\r|\n/.test(next)) {
output += char;
}
continue;
}
output += char;
}
return output;
}
}
export interface IXmlFormatterOptions {
preferSpaces?: boolean;
tabSize?: number;
newLine?: string;
splitNamespaces?: boolean;
}