This repository has been archived on 2023-06-18. You can view files and clone it, but cannot push or open issues or pull requests.
ima02/resources/defects4j-checkout-closure-1f/test/com/google/javascript/jscomp/PeepholeSimplifyRegExpTest.java

347 lines
13 KiB
Java
Raw Permalink Normal View History

2023-04-25 11:33:41 +00:00
/*
* Copyright 2011 The Closure Compiler Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.javascript.jscomp;
import com.google.javascript.rhino.Node;
public class PeepholeSimplifyRegExpTest extends CompilerTestCase {
public final void testWaysOfMatchingEmptyString() {
testSame("/(?:)/");
test("/(?:)/i", "/(?:)/"); // We can get rid of i flag when no letters.
test("/.{0}/i", "/(?:)/");
test("/[^\\0-\\uffff]{0}/", "/(?:)/");
// Cannot get rid of capturing groups.
testSame("/(){0}/");
}
public final void testCharsetOptimizations() {
testSame("/./");
test("/[\\0-\\uffff]/", "/[\\S\\s]/");
test("/[^\\0-\\uffff]/", "/(?!)/");
test("/[^\\0-\\x40\\x42-\\uffff]/", "/A/");
test("/[0-9a-fA-F]/i", "/[\\da-f]/i");
test("/[0-9a-zA-Z_$]/i", "/[\\w$]/");
test("/[()*+\\-,]/g", "/[(--]/g");
test("/[()*+\\-,z]/g", "/[(--z]/g");
test("/[\\-\\.\\/0]/g", "/[--0]/g");
test("/[\\-\\.\\/0\\n]/g", "/[\\n\\--0]/g");
test("/[\\[\\\\\\]]/g", "/[[-\\]]/g");
test("/[\\[\\\\\\]\\^]/g", "/[[-^]/g");
test("/[\\^`_]/g", "/[\\^-`]/g");
test("/[^\\^`_]/g", "/[^^-`]/g");
test("/^(?=[^a-z])/i", "/^(?=[\\W\\d_])/");
test("/^[^a-z0-9]/i", "/^[\\W_]/");
test("/[0-FA-Z]/", "/[0-Z]/");
test("/[0-9]/", "/\\d/");
test("/[^0-9]/", "/\\D/");
testSame("/\\D/");
test("/[_a-z0-9]/i", "/\\w/");
test("/[0-9_a-z]/i", "/\\w/");
test("/[_a-z0-9]/", "/[\\d_a-z]/");
test("/[_E-Za-f0-9]/i", "/\\w/");
test("/[E-Za-f]/i", "/[a-z]/i");
test("/[_E-Za-f0-9]/", "/[\\dE-Z_a-f]/");
// Test case normalization.
// U+00CA and U+00EA are E and e with ^ above
test("/[\\u00ca\\u00ea]/", "/[\\xca\\xea]/");
test("/[\\u00ca\\u00ea]/i", "/\\xca/i");
// IE (at least 6, 7, and 8) do not include \xA0 in \s so when an author
// explicitly includes it make sure it appears in the output.
testSame("/^[\\s\\xa0]*$/");
test("/^(?:\\s|\\xA0)*$/", "/^[\\s\\xa0]*$/");
}
public final void testCharsetFixup() {
testSame("/[a-z]/i");
// This is the case. The below produces no output in squarefree.
// (function () {
// // Runs to just before the letter 'a' and starts right after 'z'.
// var re = /[^\0-`{-\uffff]/i
// for (var i = 0; i < 0x10000; ++i) {
// var s = String.fromCharCode(i);
// if (re.test(s)) { print(s + ' : ' + s.charCodeAt(0).toString(16)); }
// }
// })()
test("/[^\\0-`{-\\uffff]/i", "/(?!)/");
// This looks a bit odd, but
// /[^a-z]/i is the same as all non-word characters, all digits, and _ and
// /[\W\d_]/ is the same length.
test("/[^a-z]/i", "/[\\W\\d_]/");
}
public final void testGroups() {
testSame("/foo(bar)baz/");
}
public final void testBackReferences() {
testSame("/foo(bar)baz(?:\\1|\\x01)boo/");
// But when there is no group to refer to, then the back-reference *is*
// the same as an octal escape.
test("/foo(?:bar)baz(?:\\1|\\x01)boo/", "/foobarbaz\\x01boo/");
// \\8 is never an octal escape. If there is no 8th group, then it
// is the literal character '8'
test("/foo(?:bar)baz(?:\\8|8)boo/", "/foobarbaz8boo/");
// \10 can be a capturing group.
test("/(1?)(2?)(3?)(4?)(5?)(6?)(7?)(8?)(9?)(A?)(B?)"
+ "\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1\\0/",
"/(1?)(2?)(3?)(4?)(5?)(6?)(7?)(8?)(9?)(A?)(B?)"
// \\12 does not match any group, so is treated as group 1 followed
// by literal 2.
+ "\\1(?:2)\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1\\0/");
// But \1 should not be emitted followed by a digit un-parenthesized.
test("/(1?)(2?)(3?)(4?)(5?)(6?)(7?)(8?)(9?)(A?)(B?)(?:\\1)0/",
"/(1?)(2?)(3?)(4?)(5?)(6?)(7?)(8?)(9?)(A?)(B?)\\1(?:0)/");
// \012 is never treated as a group even when there are 12 groups.
test("/(1?)(2?)(3?)(4?)(5?)(6?)(7?)(8?)(9?)(A?)(B?)(C?)"
+ "\\012\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1\\0/",
"/(1?)(2?)(3?)(4?)(5?)(6?)(7?)(8?)(9?)(A?)(B?)(C?)"
+ "\\n\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1\\0/");
}
public final void testSingleCharAlterations() {
test("/a|B|c|D/i", "/[a-d]/i");
test("/a|B|c|D/", "/[BDac]/");
test("/a|[Bc]|D/", "/[BDac]/");
test("/[aB]|[cD]/", "/[BDac]/");
test("/a|B|c|D|a|B/i", "/[a-d]/i"); // Duplicates.
test("/a|A|/i", "/a?/i");
}
public final void testAlterations() {
testSame("/foo|bar/");
test("/Foo|BAR/i", "/foo|bar/i");
test("/Foo||BAR/", "/Foo||BAR/");
test("/Foo|BAR|/", "/Foo|BAR|/");
}
public final void testNestedAlterations() {
test("/foo|bar|(?:baz|boo)|far/", "/foo|bar|baz|boo|far/");
}
public final void testEscapeSequencesAndNonLatinChars() {
test("/\u1234/i", "/\\u1234/");
testSame("/\\u1234/");
test("/\u00A0/", "/\\xa0/");
test("/\\u00A0/", "/\\xa0/");
test("/\\u00a0/", "/\\xa0/");
}
public final void testAnchors() {
// m changes the meaning of anchors which is useless if there are none.
testSame("/foo(?!$)/gm");
test("/./m", "/./");
test("/\\^/m", "/\\^/");
test("/[\\^]/m", "/\\^/");
testSame("/(^|foo)bar/");
testSame("/^.|.$/gm");
test("/foo(?=)$/m", "/foo$/m");
// We can get rid of the g when there are no capturing groups and the
// pattern is fully anchored.
test("/^foo$/g", "/^foo$/");
}
public final void testRepetitions() {
testSame("/a*/");
testSame("/a+/");
testSame("/a+?/");
testSame("/a?/");
testSame("/a{6}/");
testSame("/a{4,}/");
test("/a{3,}/",
"/aaa+/");
testSame("/a{4,6}/");
testSame("/a{4,6}?/");
test("/(?:a?)?/", "/a?/");
test("/(?:a?)*/", "/a*/");
test("/(?:a*)?/", "/a*/");
test("/a(?:a*)?/", "/a+/");
test("/(?:a{2,3}){3,4}/", "/a{6,12}/");
test("/a{2,3}a{3,4}/", "/a{5,7}/");
testSame("/a{5,7}b{5,6}/");
test("/a{2,3}b{3,4}/",
"/aaa?bbbb?/");
test("/a{3}b{3,4}/",
"/aaabbbb?/");
testSame("/[a-z]{1,2}/");
test("/\\d{1,2}/",
"/\\d\\d?/");
test("/a*a*/", "/a*/");
test("/a+a+/", "/aa+/");
test("/a+a*/", "/a+/");
// We don't conflate literal curly brackets with repetitions.
testSame("/a\\{3,1}/");
test("/a(?:{3,1})/", "/a\\{3,1}/");
test("/a{3\\,1}/", "/a\\{3,1}/");
testSame("/a\\{3}/");
testSame("/a\\{3,}/");
testSame("/a\\{1,3}/");
// We don't over-escape curly brackets.
testSame("/a{/");
testSame("/a{}/");
testSame("/a{x}/");
testSame("/a{-1}/");
testSame("/a{,3}/");
testSame("/{{[a-z]+}}/");
testSame("/{\\{0}}/");
testSame("/{\\{0?}}/");
}
public final void testMoreCharsets() {
test("var a = /[\\x00\\x22\\x26\\x27\\x3c\\x3e]/g",
"var a = /[\\0\"&'<>]/g");
test("var b = /[\\x00\\x22\\x27\\x3c\\x3e]/g",
"var b = /[\\0\"'<>]/g");
test("var c = /[\\x00\\x09-\\x0d \\x22\\x26\\x27\\x2d\\/\\x3c-\\x3e`"
+ "\\x85\\xa0\\u2028\\u2029]/g",
"var c = /[\\0\\t-\\r \"&'/<->`\\x85\\xa0\\u2028\\u2029-]/g");
test("var d = /[\\x00\\x09-\\x0d \\x22\\x27\\x2d\\/\\x3c-\\x3e`"
+ "\\x85\\xa0\\u2028\\u2029]/g",
"var d = /[\\0\\t-\\r \"'/<->`\\x85\\xa0\\u2028\\u2029-]/g");
test("var e = /[\\x00\\x08-\\x0d\\x22\\x26\\x27\\/\\x3c-\\x3e\\\\"
+ "\\x85\\u2028\\u2029]/g",
"var e = /[\\0\\b-\\r\"&'/<->\\\\\\x85\\u2028\\u2029]/g");
test("var f = /[\\x00\\x08-\\x0d\\x22\\x24\\x26-\\/\\x3a\\x3c-\\x3f"
+ "\\x5b-\\x5e\\x7b-\\x7d\\x85\\u2028\\u2029]/g",
"var f = /[\\0\\b-\\r\"$&-/:<-?[-^{-}\\x85\\u2028\\u2029]/g");
test("var g = /[\\x00\\x08-\\x0d\\x22\\x26-\\x2a\\/\\x3a-\\x3e@\\\\"
+ "\\x7b\\x7d\\x85\\xa0\\u2028\\u2029]/g",
"var g = /[\\0\\b-\\r\"&-*/:->@\\\\{}\\x85\\xa0\\u2028\\u2029]/g");
test("var h = /^(?!-*(?:expression|(?:moz-)?binding))(?:[.#]?-?"
+ "(?:[_a-z0-9][_a-z0-9-]*)(?:-[_a-z][_a-z0-9-]*)*-?|-?"
+ "(?:[0-9]+(?:\\.[0-9]*)?|\\.[0-9])(?:[a-z]{1,2}|%)?|!important|)$/i",
"var h = /^(?!-*(?:expression|(?:moz-)?binding))(?:[#.]?-?"
+ "\\w[\\w-]*(?:-[_a-z][\\w-]*)*-?|-?"
+ "(?:\\d+(?:\\.\\d*)?|\\.\\d)(?:[a-z]{1,2}|%)?|!important|)$/i");
test("var i = /^(?:(?:https?|mailto):|[^&:\\/?#]*(?:[\\/?#]|$))/i",
"var i = /^(?:(?:https?|mailto):|[^#&/:?]*(?:[#/?]|$))/i");
test("var j = /^(?!style|on|action|archive|background|cite|classid"
+ "|codebase|data|dsync|href|longdesc|src|usemap)(?:[a-z0-9_$:-]*"
+ "|dir=(?:ltr|rtl))$/i",
"var j = /^(?!style|on|action|archive|background|cite|classid"
+ "|codebase|data|dsync|href|longdesc|src|usemap)(?:[\\w$:-]*"
+ "|dir=(?:ltr|rtl))$/i");
test("var k = /^(?!script|style|title|textarea|xmp|no)[a-z0-9_$:-]*$/i",
"var k = /^(?!script|style|title|textarea|xmp|no)[\\w$:-]*$/i");
test("var l = /<(?:!|\\/?[a-z])(?:[^>'\"]|\"[^\"]*\"|'[^']*')*>/gi",
"var l = /<(?:!|\\/?[a-z])(?:[^\"'>]|\"[^\"]*\"|'[^']*')*>/gi");
}
public final void testMoreRegularExpression() {
testSame("/\"/");
testSame("/'/");
test("/(?:[^<\\/\"'\\s\\\\]|<(?!\\/script))+/i",
"/(?:[^\\s\"'/<\\\\]|<(?!\\/script))+/i");
testSame("/-->/");
testSame("/<!--/");
testSame("/<\\/(\\w+)\\b/");
testSame("/<\\/?/");
test("/<script(?=[\\s>\\/]|$)/i", "/<script(?=[\\s/>]|$)/i");
test("/<style(?=[\\s>\\/]|$)/i", "/<style(?=[\\s/>]|$)/i");
test("/<textarea(?=[\\s>\\/]|$)/i", "/<textarea(?=[\\s/>]|$)/i");
test("/<title(?=[\\s>\\/]|$)/i", "/<title(?=[\\s/>]|$)/i");
test("/<xmp(?=[\\s>\\/]|$)/i", "/<xmp(?=[\\s/>]|$)/i");
testSame("/[\"']/");
test("/[\\\\)\\s]/", "/[\\s)\\\\]/");
test("/[\\f\\r\\n\\u2028\\u2029]/", "/[\\n\\f\\r\\u2028\\u2029]/");
test("/[\\n\\r\\f]/", "/[\\n\\f\\r]/");
testSame("/\\*\\//");
testSame("/\\//");
testSame("/\\/\\*/");
testSame("/\\/\\//");
testSame("/\\\\(?:\\r\\n?|[\\n\\f\"])/");
testSame("/\\\\(?:\\r\\n?|[\\n\\f'])/");
testSame("/\\burl\\s*\\(\\s*([\"']?)/i");
testSame("/\\s+/");
test("/^(?:[^'\\\\\\n\\r\\u2028\\u2029<]|\\\\(?:\\r\\n?|[^\\r<]"
+ "|<(?!\\/script))|<(?!\\/script))/i",
"/^(?:[^\\n\\r'<\\\\\\u2028\\u2029]|\\\\(?:\\r\\n?|[^\\r<]"
+ "|<(?!\\/script))|<(?!\\/script))/i");
test("/^(?:[^\\\"\\\\\\n\\r\\u2028\\u2029<]|\\\\(?:\\r\\n?"
+ "|[^\\r<]|<(?!\\/script))|<(?!\\/script))/i",
"/^(?:[^\\n\\r\"<\\\\\\u2028\\u2029]|\\\\(?:\\r\\n?"
+ "|[^\\r<]|<(?!\\/script))|<(?!\\/script))/i");
test("/^(?:[^\\[\\\\\\/<\\n\\r\\u2028\\u2029]|\\\\[^\\n\\r\\u2028\\u2029]"
+ "|\\\\?<(?!\\/script)|\\[(?:[^\\]\\\\<\\n\\r\\u2028\\u2029]|"
+ "\\\\(?:[^\\n\\r\\u2028\\u2029]))*|\\\\?<(?!\\/script)\\])/i",
"/^(?:[^\\n\\r/<[\\\\\\u2028\\u2029]|\\\\."
+ "|\\\\?<(?!\\/script)|\\[(?:[^\\n\\r<\\\\\\]\\u2028\\u2029]|"
+ "\\\\.)*|\\\\?<(?!\\/script)])/i");
testSame("/^(?=>|\\s+[\\w-]+\\s*=)/");
test("/^(?=[\\/\\s>])/", "/^(?=[\\s/>])/");
test("/^(?=[^\"'\\s>])/", "/^(?=[^\\s\"'>])/");
testSame("/^/");
testSame("/^[^<]+/");
test("/^[a-z0-9:-]*(?:[a-z0-9]|$)/i", "/^[\\d:a-z-]*(?:[^\\W_]|$)/i");
testSame("/^[a-z]+/i");
testSame("/^\\s*\"/");
testSame("/^\\s*'/");
testSame("/^\\s*([a-z][\\w-]*)/i");
testSame("/^\\s*=/");
testSame("/^\\s*\\/?>/");
testSame("/^\\s+$/");
testSame("/^\\s+/");
}
public final void testPrecedence() {
// Repetition binds more tightly than concatenation.
testSame("/ab?/");
testSame("/(?:ab)?/");
// Concatenation bind more tightly than alterations.
testSame("/foo|bar/");
testSame("/f(?:oo|ba)r/");
}
public final void testMalformedRegularExpressions() {
test(
"/(?<!foo)/", "/(?<!foo)/", // Lookbehind not valid in ES.
null, // No error.
CheckRegExp.MALFORMED_REGEXP); // Warning.
test(
"/(/", "/(/",
null, // No error.
CheckRegExp.MALFORMED_REGEXP); // Warning.
test(
"/)/", "/)/",
null, // No error.
CheckRegExp.MALFORMED_REGEXP); // Warning.
test(
"/\\uabc/", "/\\uabc/",
null, // No error.
CheckRegExp.MALFORMED_REGEXP); // Warning.
test(
"/\\uabcg/", "/\\uabcg/",
null, // No error.
CheckRegExp.MALFORMED_REGEXP); // Warning.
}
@Override
protected CompilerPass getProcessor(Compiler compiler) {
final CompilerPass simplifier = new PeepholeOptimizationsPass(
compiler, new PeepholeSimplifyRegExp());
final CompilerPass checker = new CheckRegExp(compiler);
return new CompilerPass() {
@Override
public void process(Node externs, Node root) {
checker.process(externs, root);
simplifier.process(externs, root);
}
};
}
}