CaRMtl/org/mozilla/javascript/regexp/RegExpImpl.java

542 lines
20 KiB
Java
Raw Permalink Normal View History

/* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Rhino code, released
* May 6, 1998.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1997-1999
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* Alternatively, the contents of this file may be used under the terms of
* the GNU General Public License Version 2 or later (the "GPL"), in which
* case the provisions of the GPL are applicable instead of those above. If
* you wish to allow use of your version of this file only under the terms of
* the GPL and not to allow others to use your version of this file under the
* MPL, indicate your decision by deleting the provisions above and replacing
* them with the notice and other provisions required by the GPL. If you do
* not delete the provisions above, a recipient may use your version of this
* file under either the MPL or the GPL.
*
* ***** END LICENSE BLOCK ***** */
package org.mozilla.javascript.regexp;
import org.mozilla.javascript.*;
/**
*
*/
public class RegExpImpl implements RegExpProxy {
public boolean isRegExp(Scriptable obj) {
return obj instanceof NativeRegExp;
}
public Object compileRegExp(Context cx, String source, String flags)
{
return NativeRegExp.compileRE(cx, source, flags, false);
}
public Scriptable wrapRegExp(Context cx, Scriptable scope,
Object compiled)
{
return new NativeRegExp(scope, compiled);
}
public Object action(Context cx, Scriptable scope,
Scriptable thisObj, Object[] args,
int actionType)
{
GlobData data = new GlobData();
data.mode = actionType;
switch (actionType) {
case RA_MATCH:
{
Object rval;
data.optarg = 1;
rval = matchOrReplace(cx, scope, thisObj, args,
this, data, false);
return data.arrayobj == null ? rval : data.arrayobj;
}
case RA_SEARCH:
data.optarg = 1;
return matchOrReplace(cx, scope, thisObj, args,
this, data, false);
case RA_REPLACE:
{
Object arg1 = args.length < 2 ? Undefined.instance : args[1];
String repstr = null;
Function lambda = null;
if (arg1 instanceof Function) {
lambda = (Function) arg1;
} else {
repstr = ScriptRuntime.toString(arg1);
}
data.optarg = 2;
data.lambda = lambda;
data.repstr = repstr;
data.dollar = repstr == null ? -1 : repstr.indexOf('$');
data.charBuf = null;
data.leftIndex = 0;
Object val = matchOrReplace(cx, scope, thisObj, args,
this, data, true);
SubString rc = this.rightContext;
if (data.charBuf == null) {
if (data.global || val == null
|| !val.equals(Boolean.TRUE))
{
/* Didn't match even once. */
return data.str;
}
SubString lc = this.leftContext;
replace_glob(data, cx, scope, this, lc.index, lc.length);
}
data.charBuf.append(rc.charArray, rc.index, rc.length);
return data.charBuf.toString();
}
default:
throw Kit.codeBug();
}
}
/**
* Analog of C match_or_replace.
*/
private static Object matchOrReplace(Context cx, Scriptable scope,
Scriptable thisObj, Object[] args,
RegExpImpl reImpl,
GlobData data, boolean forceFlat)
{
NativeRegExp re;
String str = ScriptRuntime.toString(thisObj);
data.str = str;
Scriptable topScope = ScriptableObject.getTopLevelScope(scope);
if (args.length == 0) {
Object compiled = NativeRegExp.compileRE(cx, "", "", false);
re = new NativeRegExp(topScope, compiled);
} else if (args[0] instanceof NativeRegExp) {
re = (NativeRegExp) args[0];
} else {
String src = ScriptRuntime.toString(args[0]);
String opt;
if (data.optarg < args.length) {
args[0] = src;
opt = ScriptRuntime.toString(args[data.optarg]);
} else {
opt = null;
}
Object compiled = NativeRegExp.compileRE(cx, src, opt, forceFlat);
re = new NativeRegExp(topScope, compiled);
}
data.regexp = re;
data.global = (re.getFlags() & NativeRegExp.JSREG_GLOB) != 0;
int[] indexp = { 0 };
Object result = null;
if (data.mode == RA_SEARCH) {
result = re.executeRegExp(cx, scope, reImpl,
str, indexp, NativeRegExp.TEST);
if (result != null && result.equals(Boolean.TRUE))
result = new Integer(reImpl.leftContext.length);
else
result = new Integer(-1);
} else if (data.global) {
re.lastIndex = 0;
for (int count = 0; indexp[0] <= str.length(); count++) {
result = re.executeRegExp(cx, scope, reImpl,
str, indexp, NativeRegExp.TEST);
if (result == null || !result.equals(Boolean.TRUE))
break;
if (data.mode == RA_MATCH) {
match_glob(data, cx, scope, count, reImpl);
} else {
if (data.mode != RA_REPLACE) Kit.codeBug();
SubString lastMatch = reImpl.lastMatch;
int leftIndex = data.leftIndex;
int leftlen = lastMatch.index - leftIndex;
data.leftIndex = lastMatch.index + lastMatch.length;
replace_glob(data, cx, scope, reImpl, leftIndex, leftlen);
}
if (reImpl.lastMatch.length == 0) {
if (indexp[0] == str.length())
break;
indexp[0]++;
}
}
} else {
result = re.executeRegExp(cx, scope, reImpl, str, indexp,
((data.mode == RA_REPLACE)
? NativeRegExp.TEST
: NativeRegExp.MATCH));
}
return result;
}
public int find_split(Context cx, Scriptable scope, String target,
String separator, Scriptable reObj,
int[] ip, int[] matchlen,
boolean[] matched, String[][] parensp)
{
int i = ip[0];
int length = target.length();
int result;
int version = cx.getLanguageVersion();
NativeRegExp re = (NativeRegExp) reObj;
again:
while (true) { // imitating C label
/* JS1.2 deviated from Perl by never matching at end of string. */
int ipsave = ip[0]; // reuse ip to save object creation
ip[0] = i;
Object ret = re.executeRegExp(cx, scope, this, target, ip,
NativeRegExp.TEST);
if (ret != Boolean.TRUE) {
// Mismatch: ensure our caller advances i past end of string.
ip[0] = ipsave;
matchlen[0] = 1;
matched[0] = false;
return length;
}
i = ip[0];
ip[0] = ipsave;
matched[0] = true;
SubString sep = this.lastMatch;
matchlen[0] = sep.length;
if (matchlen[0] == 0) {
/*
* Empty string match: never split on an empty
* match at the start of a find_split cycle. Same
* rule as for an empty global match in
* match_or_replace.
*/
if (i == ip[0]) {
/*
* "Bump-along" to avoid sticking at an empty
* match, but don't bump past end of string --
* our caller must do that by adding
* sep->length to our return value.
*/
if (i == length) {
if (version == Context.VERSION_1_2) {
matchlen[0] = 1;
result = i;
}
else
result = -1;
break;
}
i++;
continue again; // imitating C goto
}
}
// PR_ASSERT((size_t)i >= sep->length);
result = i - matchlen[0];
break;
}
int size = (parens == null) ? 0 : parens.length;
parensp[0] = new String[size];
for (int num = 0; num < size; num++) {
SubString parsub = getParenSubString(num);
parensp[0][num] = parsub.toString();
}
return result;
}
/**
* Analog of REGEXP_PAREN_SUBSTRING in C jsregexp.h.
* Assumes zero-based; i.e., for $3, i==2
*/
SubString getParenSubString(int i)
{
if (parens != null && i < parens.length) {
SubString parsub = parens[i];
if (parsub != null) {
return parsub;
}
}
return SubString.emptySubString;
}
/*
* Analog of match_glob() in jsstr.c
*/
private static void match_glob(GlobData mdata, Context cx,
Scriptable scope, int count,
RegExpImpl reImpl)
{
if (mdata.arrayobj == null) {
Scriptable s = ScriptableObject.getTopLevelScope(scope);
mdata.arrayobj = ScriptRuntime.newObject(cx, s, "Array", null);
}
SubString matchsub = reImpl.lastMatch;
String matchstr = matchsub.toString();
mdata.arrayobj.put(count, mdata.arrayobj, matchstr);
}
/*
* Analog of replace_glob() in jsstr.c
*/
private static void replace_glob(GlobData rdata, Context cx,
Scriptable scope, RegExpImpl reImpl,
int leftIndex, int leftlen)
{
int replen;
String lambdaStr;
if (rdata.lambda != null) {
// invoke lambda function with args lastMatch, $1, $2, ... $n,
// leftContext.length, whole string.
SubString[] parens = reImpl.parens;
int parenCount = (parens == null) ? 0 : parens.length;
Object[] args = new Object[parenCount + 3];
args[0] = reImpl.lastMatch.toString();
for (int i=0; i < parenCount; i++) {
SubString sub = parens[i];
if (sub != null) {
args[i+1] = sub.toString();
} else {
args[i+1] = Undefined.instance;
}
}
args[parenCount+1] = new Integer(reImpl.leftContext.length);
args[parenCount+2] = rdata.str;
// This is a hack to prevent expose of reImpl data to
// JS function which can run new regexps modifing
// regexp that are used later by the engine.
// TODO: redesign is necessary
if (reImpl != ScriptRuntime.getRegExpProxy(cx)) Kit.codeBug();
RegExpImpl re2 = new RegExpImpl();
re2.multiline = reImpl.multiline;
re2.input = reImpl.input;
ScriptRuntime.setRegExpProxy(cx, re2);
try {
Scriptable parent = ScriptableObject.getTopLevelScope(scope);
Object result = rdata.lambda.call(cx, parent, parent, args);
lambdaStr = ScriptRuntime.toString(result);
} finally {
ScriptRuntime.setRegExpProxy(cx, reImpl);
}
replen = lambdaStr.length();
} else {
lambdaStr = null;
replen = rdata.repstr.length();
if (rdata.dollar >= 0) {
int[] skip = new int[1];
int dp = rdata.dollar;
do {
SubString sub = interpretDollar(cx, reImpl, rdata.repstr,
dp, skip);
if (sub != null) {
replen += sub.length - skip[0];
dp += skip[0];
} else {
++dp;
}
dp = rdata.repstr.indexOf('$', dp);
} while (dp >= 0);
}
}
int growth = leftlen + replen + reImpl.rightContext.length;
StringBuffer charBuf = rdata.charBuf;
if (charBuf == null) {
charBuf = new StringBuffer(growth);
rdata.charBuf = charBuf;
} else {
charBuf.ensureCapacity(rdata.charBuf.length() + growth);
}
charBuf.append(reImpl.leftContext.charArray, leftIndex, leftlen);
if (rdata.lambda != null) {
charBuf.append(lambdaStr);
} else {
do_replace(rdata, cx, reImpl);
}
}
private static SubString interpretDollar(Context cx, RegExpImpl res,
String da, int dp, int[] skip)
{
char dc;
int num, tmp;
if (da.charAt(dp) != '$') Kit.codeBug();
/* Allow a real backslash (literal "\\") to escape "$1" etc. */
int version = cx.getLanguageVersion();
if (version != Context.VERSION_DEFAULT
&& version <= Context.VERSION_1_4)
{
if (dp > 0 && da.charAt(dp - 1) == '\\')
return null;
}
int daL = da.length();
if (dp + 1 >= daL)
return null;
/* Interpret all Perl match-induced dollar variables. */
dc = da.charAt(dp + 1);
if (NativeRegExp.isDigit(dc)) {
int cp;
if (version != Context.VERSION_DEFAULT
&& version <= Context.VERSION_1_4)
{
if (dc == '0')
return null;
/* Check for overflow to avoid gobbling arbitrary decimal digits. */
num = 0;
cp = dp;
while (++cp < daL && NativeRegExp.isDigit(dc = da.charAt(cp)))
{
tmp = 10 * num + (dc - '0');
if (tmp < num)
break;
num = tmp;
}
}
else { /* ECMA 3, 1-9 or 01-99 */
int parenCount = (res.parens == null) ? 0 : res.parens.length;
num = dc - '0';
if (num > parenCount)
return null;
cp = dp + 2;
if ((dp + 2) < daL) {
dc = da.charAt(dp + 2);
if (NativeRegExp.isDigit(dc)) {
tmp = 10 * num + (dc - '0');
if (tmp <= parenCount) {
cp++;
num = tmp;
}
}
}
if (num == 0) return null; /* $0 or $00 is not valid */
}
/* Adjust num from 1 $n-origin to 0 array-index-origin. */
num--;
skip[0] = cp - dp;
return res.getParenSubString(num);
}
skip[0] = 2;
switch (dc) {
case '$':
return new SubString("$");
case '&':
return res.lastMatch;
case '+':
return res.lastParen;
case '`':
if (version == Context.VERSION_1_2) {
/*
* JS1.2 imitated the Perl4 bug where left context at each step
* in an iterative use of a global regexp started from last match,
* not from the start of the target string. But Perl4 does start
* $` at the beginning of the target string when it is used in a
* substitution, so we emulate that special case here.
*/
res.leftContext.index = 0;
res.leftContext.length = res.lastMatch.index;
}
return res.leftContext;
case '\'':
return res.rightContext;
}
return null;
}
/**
* Analog of do_replace in jsstr.c
*/
private static void do_replace(GlobData rdata, Context cx,
RegExpImpl regExpImpl)
{
StringBuffer charBuf = rdata.charBuf;
int cp = 0;
String da = rdata.repstr;
int dp = rdata.dollar;
if (dp != -1) {
int[] skip = new int[1];
do {
int len = dp - cp;
charBuf.append(da.substring(cp, dp));
cp = dp;
SubString sub = interpretDollar(cx, regExpImpl, da,
dp, skip);
if (sub != null) {
len = sub.length;
if (len > 0) {
charBuf.append(sub.charArray, sub.index, len);
}
cp += skip[0];
dp += skip[0];
} else {
++dp;
}
dp = da.indexOf('$', dp);
} while (dp >= 0);
}
int daL = da.length();
if (daL > cp) {
charBuf.append(da.substring(cp, daL));
}
}
String input; /* input string to match (perl $_, GC root) */
boolean multiline; /* whether input contains newlines (perl $*) */
SubString[] parens; /* Vector of SubString; last set of parens
matched (perl $1, $2) */
SubString lastMatch; /* last string matched (perl $&) */
SubString lastParen; /* last paren matched (perl $+) */
SubString leftContext; /* input to left of last match (perl $`) */
SubString rightContext; /* input to right of last match (perl $') */
}
final class GlobData
{
int mode; /* input: return index, match object, or void */
int optarg; /* input: index of optional flags argument */
boolean global; /* output: whether regexp was global */
String str; /* output: 'this' parameter object as string */
NativeRegExp regexp;/* output: regexp parameter object private data */
// match-specific data
Scriptable arrayobj;
// replace-specific data
Function lambda; /* replacement function object or null */
String repstr; /* replacement string */
int dollar = -1; /* -1 or index of first $ in repstr */
StringBuffer charBuf; /* result characters, null initially */
int leftIndex; /* leftContext index, always 0 for JS1.2 */
}