347 lines
13 KiB
Java
347 lines
13 KiB
Java
|
/*
|
||
|
* Copyright 2011 The Closure Compiler Authors.
|
||
|
*
|
||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
* you may not use this file except in compliance with the License.
|
||
|
* You may obtain a copy of the License at
|
||
|
*
|
||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||
|
*
|
||
|
* Unless required by applicable law or agreed to in writing, software
|
||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
* See the License for the specific language governing permissions and
|
||
|
* limitations under the License.
|
||
|
*/
|
||
|
|
||
|
package com.google.javascript.jscomp;
|
||
|
|
||
|
import com.google.javascript.rhino.Node;
|
||
|
|
||
|
public class PeepholeSimplifyRegExpTest extends CompilerTestCase {
|
||
|
|
||
|
public final void testWaysOfMatchingEmptyString() {
|
||
|
testSame("/(?:)/");
|
||
|
test("/(?:)/i", "/(?:)/"); // We can get rid of i flag when no letters.
|
||
|
test("/.{0}/i", "/(?:)/");
|
||
|
test("/[^\\0-\\uffff]{0}/", "/(?:)/");
|
||
|
// Cannot get rid of capturing groups.
|
||
|
testSame("/(){0}/");
|
||
|
}
|
||
|
|
||
|
public final void testCharsetOptimizations() {
|
||
|
testSame("/./");
|
||
|
test("/[\\0-\\uffff]/", "/[\\S\\s]/");
|
||
|
test("/[^\\0-\\uffff]/", "/(?!)/");
|
||
|
test("/[^\\0-\\x40\\x42-\\uffff]/", "/A/");
|
||
|
test("/[0-9a-fA-F]/i", "/[\\da-f]/i");
|
||
|
test("/[0-9a-zA-Z_$]/i", "/[\\w$]/");
|
||
|
test("/[()*+\\-,]/g", "/[(--]/g");
|
||
|
test("/[()*+\\-,z]/g", "/[(--z]/g");
|
||
|
test("/[\\-\\.\\/0]/g", "/[--0]/g");
|
||
|
test("/[\\-\\.\\/0\\n]/g", "/[\\n\\--0]/g");
|
||
|
test("/[\\[\\\\\\]]/g", "/[[-\\]]/g");
|
||
|
test("/[\\[\\\\\\]\\^]/g", "/[[-^]/g");
|
||
|
test("/[\\^`_]/g", "/[\\^-`]/g");
|
||
|
test("/[^\\^`_]/g", "/[^^-`]/g");
|
||
|
test("/^(?=[^a-z])/i", "/^(?=[\\W\\d_])/");
|
||
|
test("/^[^a-z0-9]/i", "/^[\\W_]/");
|
||
|
test("/[0-FA-Z]/", "/[0-Z]/");
|
||
|
test("/[0-9]/", "/\\d/");
|
||
|
test("/[^0-9]/", "/\\D/");
|
||
|
testSame("/\\D/");
|
||
|
test("/[_a-z0-9]/i", "/\\w/");
|
||
|
test("/[0-9_a-z]/i", "/\\w/");
|
||
|
test("/[_a-z0-9]/", "/[\\d_a-z]/");
|
||
|
test("/[_E-Za-f0-9]/i", "/\\w/");
|
||
|
test("/[E-Za-f]/i", "/[a-z]/i");
|
||
|
test("/[_E-Za-f0-9]/", "/[\\dE-Z_a-f]/");
|
||
|
// Test case normalization.
|
||
|
// U+00CA and U+00EA are E and e with ^ above
|
||
|
test("/[\\u00ca\\u00ea]/", "/[\\xca\\xea]/");
|
||
|
test("/[\\u00ca\\u00ea]/i", "/\\xca/i");
|
||
|
// IE (at least 6, 7, and 8) do not include \xA0 in \s so when an author
|
||
|
// explicitly includes it make sure it appears in the output.
|
||
|
testSame("/^[\\s\\xa0]*$/");
|
||
|
test("/^(?:\\s|\\xA0)*$/", "/^[\\s\\xa0]*$/");
|
||
|
}
|
||
|
|
||
|
public final void testCharsetFixup() {
|
||
|
testSame("/[a-z]/i");
|
||
|
// This is the case. The below produces no output in squarefree.
|
||
|
// (function () {
|
||
|
// // Runs to just before the letter 'a' and starts right after 'z'.
|
||
|
// var re = /[^\0-`{-\uffff]/i
|
||
|
// for (var i = 0; i < 0x10000; ++i) {
|
||
|
// var s = String.fromCharCode(i);
|
||
|
// if (re.test(s)) { print(s + ' : ' + s.charCodeAt(0).toString(16)); }
|
||
|
// }
|
||
|
// })()
|
||
|
test("/[^\\0-`{-\\uffff]/i", "/(?!)/");
|
||
|
// This looks a bit odd, but
|
||
|
// /[^a-z]/i is the same as all non-word characters, all digits, and _ and
|
||
|
// /[\W\d_]/ is the same length.
|
||
|
test("/[^a-z]/i", "/[\\W\\d_]/");
|
||
|
}
|
||
|
|
||
|
public final void testGroups() {
|
||
|
testSame("/foo(bar)baz/");
|
||
|
}
|
||
|
|
||
|
public final void testBackReferences() {
|
||
|
testSame("/foo(bar)baz(?:\\1|\\x01)boo/");
|
||
|
// But when there is no group to refer to, then the back-reference *is*
|
||
|
// the same as an octal escape.
|
||
|
test("/foo(?:bar)baz(?:\\1|\\x01)boo/", "/foobarbaz\\x01boo/");
|
||
|
// \\8 is never an octal escape. If there is no 8th group, then it
|
||
|
// is the literal character '8'
|
||
|
test("/foo(?:bar)baz(?:\\8|8)boo/", "/foobarbaz8boo/");
|
||
|
// \10 can be a capturing group.
|
||
|
test("/(1?)(2?)(3?)(4?)(5?)(6?)(7?)(8?)(9?)(A?)(B?)"
|
||
|
+ "\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1\\0/",
|
||
|
"/(1?)(2?)(3?)(4?)(5?)(6?)(7?)(8?)(9?)(A?)(B?)"
|
||
|
// \\12 does not match any group, so is treated as group 1 followed
|
||
|
// by literal 2.
|
||
|
+ "\\1(?:2)\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1\\0/");
|
||
|
// But \1 should not be emitted followed by a digit un-parenthesized.
|
||
|
test("/(1?)(2?)(3?)(4?)(5?)(6?)(7?)(8?)(9?)(A?)(B?)(?:\\1)0/",
|
||
|
"/(1?)(2?)(3?)(4?)(5?)(6?)(7?)(8?)(9?)(A?)(B?)\\1(?:0)/");
|
||
|
// \012 is never treated as a group even when there are 12 groups.
|
||
|
test("/(1?)(2?)(3?)(4?)(5?)(6?)(7?)(8?)(9?)(A?)(B?)(C?)"
|
||
|
+ "\\012\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1\\0/",
|
||
|
"/(1?)(2?)(3?)(4?)(5?)(6?)(7?)(8?)(9?)(A?)(B?)(C?)"
|
||
|
+ "\\n\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1\\0/");
|
||
|
}
|
||
|
|
||
|
public final void testSingleCharAlterations() {
|
||
|
test("/a|B|c|D/i", "/[a-d]/i");
|
||
|
test("/a|B|c|D/", "/[BDac]/");
|
||
|
test("/a|[Bc]|D/", "/[BDac]/");
|
||
|
test("/[aB]|[cD]/", "/[BDac]/");
|
||
|
test("/a|B|c|D|a|B/i", "/[a-d]/i"); // Duplicates.
|
||
|
test("/a|A|/i", "/a?/i");
|
||
|
}
|
||
|
|
||
|
public final void testAlterations() {
|
||
|
testSame("/foo|bar/");
|
||
|
test("/Foo|BAR/i", "/foo|bar/i");
|
||
|
test("/Foo||BAR/", "/Foo||BAR/");
|
||
|
test("/Foo|BAR|/", "/Foo|BAR|/");
|
||
|
}
|
||
|
|
||
|
public final void testNestedAlterations() {
|
||
|
test("/foo|bar|(?:baz|boo)|far/", "/foo|bar|baz|boo|far/");
|
||
|
}
|
||
|
|
||
|
public final void testEscapeSequencesAndNonLatinChars() {
|
||
|
test("/\u1234/i", "/\\u1234/");
|
||
|
testSame("/\\u1234/");
|
||
|
test("/\u00A0/", "/\\xa0/");
|
||
|
test("/\\u00A0/", "/\\xa0/");
|
||
|
test("/\\u00a0/", "/\\xa0/");
|
||
|
}
|
||
|
|
||
|
public final void testAnchors() {
|
||
|
// m changes the meaning of anchors which is useless if there are none.
|
||
|
testSame("/foo(?!$)/gm");
|
||
|
test("/./m", "/./");
|
||
|
test("/\\^/m", "/\\^/");
|
||
|
test("/[\\^]/m", "/\\^/");
|
||
|
testSame("/(^|foo)bar/");
|
||
|
testSame("/^.|.$/gm");
|
||
|
test("/foo(?=)$/m", "/foo$/m");
|
||
|
// We can get rid of the g when there are no capturing groups and the
|
||
|
// pattern is fully anchored.
|
||
|
test("/^foo$/g", "/^foo$/");
|
||
|
}
|
||
|
|
||
|
public final void testRepetitions() {
|
||
|
testSame("/a*/");
|
||
|
testSame("/a+/");
|
||
|
testSame("/a+?/");
|
||
|
testSame("/a?/");
|
||
|
testSame("/a{6}/");
|
||
|
testSame("/a{4,}/");
|
||
|
test("/a{3,}/",
|
||
|
"/aaa+/");
|
||
|
testSame("/a{4,6}/");
|
||
|
testSame("/a{4,6}?/");
|
||
|
test("/(?:a?)?/", "/a?/");
|
||
|
test("/(?:a?)*/", "/a*/");
|
||
|
test("/(?:a*)?/", "/a*/");
|
||
|
test("/a(?:a*)?/", "/a+/");
|
||
|
test("/(?:a{2,3}){3,4}/", "/a{6,12}/");
|
||
|
test("/a{2,3}a{3,4}/", "/a{5,7}/");
|
||
|
testSame("/a{5,7}b{5,6}/");
|
||
|
test("/a{2,3}b{3,4}/",
|
||
|
"/aaa?bbbb?/");
|
||
|
test("/a{3}b{3,4}/",
|
||
|
"/aaabbbb?/");
|
||
|
testSame("/[a-z]{1,2}/");
|
||
|
test("/\\d{1,2}/",
|
||
|
"/\\d\\d?/");
|
||
|
test("/a*a*/", "/a*/");
|
||
|
test("/a+a+/", "/aa+/");
|
||
|
test("/a+a*/", "/a+/");
|
||
|
// We don't conflate literal curly brackets with repetitions.
|
||
|
testSame("/a\\{3,1}/");
|
||
|
test("/a(?:{3,1})/", "/a\\{3,1}/");
|
||
|
test("/a{3\\,1}/", "/a\\{3,1}/");
|
||
|
testSame("/a\\{3}/");
|
||
|
testSame("/a\\{3,}/");
|
||
|
testSame("/a\\{1,3}/");
|
||
|
// We don't over-escape curly brackets.
|
||
|
testSame("/a{/");
|
||
|
testSame("/a{}/");
|
||
|
testSame("/a{x}/");
|
||
|
testSame("/a{-1}/");
|
||
|
testSame("/a{,3}/");
|
||
|
testSame("/{{[a-z]+}}/");
|
||
|
testSame("/{\\{0}}/");
|
||
|
testSame("/{\\{0?}}/");
|
||
|
}
|
||
|
|
||
|
public final void testMoreCharsets() {
|
||
|
test("var a = /[\\x00\\x22\\x26\\x27\\x3c\\x3e]/g",
|
||
|
"var a = /[\\0\"&'<>]/g");
|
||
|
test("var b = /[\\x00\\x22\\x27\\x3c\\x3e]/g",
|
||
|
"var b = /[\\0\"'<>]/g");
|
||
|
test("var c = /[\\x00\\x09-\\x0d \\x22\\x26\\x27\\x2d\\/\\x3c-\\x3e`"
|
||
|
+ "\\x85\\xa0\\u2028\\u2029]/g",
|
||
|
"var c = /[\\0\\t-\\r \"&'/<->`\\x85\\xa0\\u2028\\u2029-]/g");
|
||
|
test("var d = /[\\x00\\x09-\\x0d \\x22\\x27\\x2d\\/\\x3c-\\x3e`"
|
||
|
+ "\\x85\\xa0\\u2028\\u2029]/g",
|
||
|
"var d = /[\\0\\t-\\r \"'/<->`\\x85\\xa0\\u2028\\u2029-]/g");
|
||
|
test("var e = /[\\x00\\x08-\\x0d\\x22\\x26\\x27\\/\\x3c-\\x3e\\\\"
|
||
|
+ "\\x85\\u2028\\u2029]/g",
|
||
|
"var e = /[\\0\\b-\\r\"&'/<->\\\\\\x85\\u2028\\u2029]/g");
|
||
|
test("var f = /[\\x00\\x08-\\x0d\\x22\\x24\\x26-\\/\\x3a\\x3c-\\x3f"
|
||
|
+ "\\x5b-\\x5e\\x7b-\\x7d\\x85\\u2028\\u2029]/g",
|
||
|
"var f = /[\\0\\b-\\r\"$&-/:<-?[-^{-}\\x85\\u2028\\u2029]/g");
|
||
|
test("var g = /[\\x00\\x08-\\x0d\\x22\\x26-\\x2a\\/\\x3a-\\x3e@\\\\"
|
||
|
+ "\\x7b\\x7d\\x85\\xa0\\u2028\\u2029]/g",
|
||
|
"var g = /[\\0\\b-\\r\"&-*/:->@\\\\{}\\x85\\xa0\\u2028\\u2029]/g");
|
||
|
test("var h = /^(?!-*(?:expression|(?:moz-)?binding))(?:[.#]?-?"
|
||
|
+ "(?:[_a-z0-9][_a-z0-9-]*)(?:-[_a-z][_a-z0-9-]*)*-?|-?"
|
||
|
+ "(?:[0-9]+(?:\\.[0-9]*)?|\\.[0-9])(?:[a-z]{1,2}|%)?|!important|)$/i",
|
||
|
"var h = /^(?!-*(?:expression|(?:moz-)?binding))(?:[#.]?-?"
|
||
|
+ "\\w[\\w-]*(?:-[_a-z][\\w-]*)*-?|-?"
|
||
|
+ "(?:\\d+(?:\\.\\d*)?|\\.\\d)(?:[a-z]{1,2}|%)?|!important|)$/i");
|
||
|
test("var i = /^(?:(?:https?|mailto):|[^&:\\/?#]*(?:[\\/?#]|$))/i",
|
||
|
"var i = /^(?:(?:https?|mailto):|[^#&/:?]*(?:[#/?]|$))/i");
|
||
|
test("var j = /^(?!style|on|action|archive|background|cite|classid"
|
||
|
+ "|codebase|data|dsync|href|longdesc|src|usemap)(?:[a-z0-9_$:-]*"
|
||
|
+ "|dir=(?:ltr|rtl))$/i",
|
||
|
"var j = /^(?!style|on|action|archive|background|cite|classid"
|
||
|
+ "|codebase|data|dsync|href|longdesc|src|usemap)(?:[\\w$:-]*"
|
||
|
+ "|dir=(?:ltr|rtl))$/i");
|
||
|
test("var k = /^(?!script|style|title|textarea|xmp|no)[a-z0-9_$:-]*$/i",
|
||
|
"var k = /^(?!script|style|title|textarea|xmp|no)[\\w$:-]*$/i");
|
||
|
test("var l = /<(?:!|\\/?[a-z])(?:[^>'\"]|\"[^\"]*\"|'[^']*')*>/gi",
|
||
|
"var l = /<(?:!|\\/?[a-z])(?:[^\"'>]|\"[^\"]*\"|'[^']*')*>/gi");
|
||
|
}
|
||
|
|
||
|
public final void testMoreRegularExpression() {
|
||
|
testSame("/\"/");
|
||
|
testSame("/'/");
|
||
|
test("/(?:[^<\\/\"'\\s\\\\]|<(?!\\/script))+/i",
|
||
|
"/(?:[^\\s\"'/<\\\\]|<(?!\\/script))+/i");
|
||
|
testSame("/-->/");
|
||
|
testSame("/<!--/");
|
||
|
testSame("/<\\/(\\w+)\\b/");
|
||
|
testSame("/<\\/?/");
|
||
|
test("/<script(?=[\\s>\\/]|$)/i", "/<script(?=[\\s/>]|$)/i");
|
||
|
test("/<style(?=[\\s>\\/]|$)/i", "/<style(?=[\\s/>]|$)/i");
|
||
|
test("/<textarea(?=[\\s>\\/]|$)/i", "/<textarea(?=[\\s/>]|$)/i");
|
||
|
test("/<title(?=[\\s>\\/]|$)/i", "/<title(?=[\\s/>]|$)/i");
|
||
|
test("/<xmp(?=[\\s>\\/]|$)/i", "/<xmp(?=[\\s/>]|$)/i");
|
||
|
testSame("/[\"']/");
|
||
|
test("/[\\\\)\\s]/", "/[\\s)\\\\]/");
|
||
|
test("/[\\f\\r\\n\\u2028\\u2029]/", "/[\\n\\f\\r\\u2028\\u2029]/");
|
||
|
test("/[\\n\\r\\f]/", "/[\\n\\f\\r]/");
|
||
|
testSame("/\\*\\//");
|
||
|
testSame("/\\//");
|
||
|
testSame("/\\/\\*/");
|
||
|
testSame("/\\/\\//");
|
||
|
testSame("/\\\\(?:\\r\\n?|[\\n\\f\"])/");
|
||
|
testSame("/\\\\(?:\\r\\n?|[\\n\\f'])/");
|
||
|
testSame("/\\burl\\s*\\(\\s*([\"']?)/i");
|
||
|
testSame("/\\s+/");
|
||
|
test("/^(?:[^'\\\\\\n\\r\\u2028\\u2029<]|\\\\(?:\\r\\n?|[^\\r<]"
|
||
|
+ "|<(?!\\/script))|<(?!\\/script))/i",
|
||
|
"/^(?:[^\\n\\r'<\\\\\\u2028\\u2029]|\\\\(?:\\r\\n?|[^\\r<]"
|
||
|
+ "|<(?!\\/script))|<(?!\\/script))/i");
|
||
|
test("/^(?:[^\\\"\\\\\\n\\r\\u2028\\u2029<]|\\\\(?:\\r\\n?"
|
||
|
+ "|[^\\r<]|<(?!\\/script))|<(?!\\/script))/i",
|
||
|
"/^(?:[^\\n\\r\"<\\\\\\u2028\\u2029]|\\\\(?:\\r\\n?"
|
||
|
+ "|[^\\r<]|<(?!\\/script))|<(?!\\/script))/i");
|
||
|
test("/^(?:[^\\[\\\\\\/<\\n\\r\\u2028\\u2029]|\\\\[^\\n\\r\\u2028\\u2029]"
|
||
|
+ "|\\\\?<(?!\\/script)|\\[(?:[^\\]\\\\<\\n\\r\\u2028\\u2029]|"
|
||
|
+ "\\\\(?:[^\\n\\r\\u2028\\u2029]))*|\\\\?<(?!\\/script)\\])/i",
|
||
|
"/^(?:[^\\n\\r/<[\\\\\\u2028\\u2029]|\\\\."
|
||
|
+ "|\\\\?<(?!\\/script)|\\[(?:[^\\n\\r<\\\\\\]\\u2028\\u2029]|"
|
||
|
+ "\\\\.)*|\\\\?<(?!\\/script)])/i");
|
||
|
testSame("/^(?=>|\\s+[\\w-]+\\s*=)/");
|
||
|
test("/^(?=[\\/\\s>])/", "/^(?=[\\s/>])/");
|
||
|
test("/^(?=[^\"'\\s>])/", "/^(?=[^\\s\"'>])/");
|
||
|
testSame("/^/");
|
||
|
testSame("/^[^<]+/");
|
||
|
test("/^[a-z0-9:-]*(?:[a-z0-9]|$)/i", "/^[\\d:a-z-]*(?:[^\\W_]|$)/i");
|
||
|
testSame("/^[a-z]+/i");
|
||
|
testSame("/^\\s*\"/");
|
||
|
testSame("/^\\s*'/");
|
||
|
testSame("/^\\s*([a-z][\\w-]*)/i");
|
||
|
testSame("/^\\s*=/");
|
||
|
testSame("/^\\s*\\/?>/");
|
||
|
testSame("/^\\s+$/");
|
||
|
testSame("/^\\s+/");
|
||
|
}
|
||
|
|
||
|
public final void testPrecedence() {
|
||
|
// Repetition binds more tightly than concatenation.
|
||
|
testSame("/ab?/");
|
||
|
testSame("/(?:ab)?/");
|
||
|
// Concatenation bind more tightly than alterations.
|
||
|
testSame("/foo|bar/");
|
||
|
testSame("/f(?:oo|ba)r/");
|
||
|
}
|
||
|
|
||
|
public final void testMalformedRegularExpressions() {
|
||
|
test(
|
||
|
"/(?<!foo)/", "/(?<!foo)/", // Lookbehind not valid in ES.
|
||
|
null, // No error.
|
||
|
CheckRegExp.MALFORMED_REGEXP); // Warning.
|
||
|
test(
|
||
|
"/(/", "/(/",
|
||
|
null, // No error.
|
||
|
CheckRegExp.MALFORMED_REGEXP); // Warning.
|
||
|
test(
|
||
|
"/)/", "/)/",
|
||
|
null, // No error.
|
||
|
CheckRegExp.MALFORMED_REGEXP); // Warning.
|
||
|
test(
|
||
|
"/\\uabc/", "/\\uabc/",
|
||
|
null, // No error.
|
||
|
CheckRegExp.MALFORMED_REGEXP); // Warning.
|
||
|
test(
|
||
|
"/\\uabcg/", "/\\uabcg/",
|
||
|
null, // No error.
|
||
|
CheckRegExp.MALFORMED_REGEXP); // Warning.
|
||
|
}
|
||
|
|
||
|
@Override
|
||
|
protected CompilerPass getProcessor(Compiler compiler) {
|
||
|
final CompilerPass simplifier = new PeepholeOptimizationsPass(
|
||
|
compiler, new PeepholeSimplifyRegExp());
|
||
|
final CompilerPass checker = new CheckRegExp(compiler);
|
||
|
|
||
|
return new CompilerPass() {
|
||
|
@Override
|
||
|
public void process(Node externs, Node root) {
|
||
|
checker.process(externs, root);
|
||
|
simplifier.process(externs, root);
|
||
|
}
|
||
|
};
|
||
|
}
|
||
|
}
|