regexp_object.cpp

00001 // -*- c-basic-offset: 2 -*-
00002 /*
00003  *  This file is part of the KDE libraries
00004  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
00005  *  Copyright (C) 2003 Apple Computer, Inc.
00006  *
00007  *  This library is free software; you can redistribute it and/or
00008  *  modify it under the terms of the GNU Lesser General Public
00009  *  License as published by the Free Software Foundation; either
00010  *  version 2 of the License, or (at your option) any later version.
00011  *
00012  *  This library is distributed in the hope that it will be useful,
00013  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  *  Lesser General Public License for more details.
00016  *
00017  *  You should have received a copy of the GNU Lesser General Public
00018  *  License along with this library; if not, write to the Free Software
00019  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00020  *
00021  */
00022 
00023 #include <stdio.h>
00024 
00025 #include "value.h"
00026 #include "object.h"
00027 #include "types.h"
00028 #include "interpreter.h"
00029 #include "operations.h"
00030 #include "internal.h"
00031 #include "regexp.h"
00032 #include "regexp_object.h"
00033 #include "error_object.h"
00034 #include "lookup.h"
00035 
00036 using namespace KJS;
00037 
00038 // ------------------------------ RegExpPrototypeImp ---------------------------
00039 
00040 // ECMA 15.9.4
00041 
00042 const ClassInfo RegExpPrototypeImp::info = {"RegExp", 0, 0, 0};
00043 
00044 RegExpPrototypeImp::RegExpPrototypeImp(ExecState *exec,
00045                                        ObjectPrototypeImp *objProto,
00046                                        FunctionPrototypeImp *funcProto)
00047   : ObjectImp(objProto)
00048 {
00049   Value protect(this);
00050   setInternalValue(String(""));
00051 
00052   // The constructor will be added later in RegExpObject's constructor (?)
00053 
00054   static const Identifier execPropertyName("exec");
00055   putDirect(execPropertyName,
00056         new RegExpProtoFuncImp(exec,funcProto,RegExpProtoFuncImp::Exec,     0, execPropertyName), DontEnum);
00057   static const Identifier testPropertyName("test");
00058   putDirect(testPropertyName,
00059         new RegExpProtoFuncImp(exec,funcProto,RegExpProtoFuncImp::Test,     0, testPropertyName), DontEnum);
00060   putDirect(toStringPropertyName,
00061         new RegExpProtoFuncImp(exec,funcProto,RegExpProtoFuncImp::ToString, 0, toStringPropertyName), DontEnum);
00062   static const Identifier compilePropertyName("compile");
00063   putDirect(compilePropertyName,
00064             new RegExpProtoFuncImp(exec,funcProto,RegExpProtoFuncImp::Compile,  1, compilePropertyName), DontEnum);
00065 }
00066 
00067 // ------------------------------ RegExpProtoFuncImp ---------------------------
00068 
00069 RegExpProtoFuncImp::RegExpProtoFuncImp(ExecState * /*exec*/, FunctionPrototypeImp *funcProto,
00070                                        int i, int len, const Identifier &_ident)
00071   : InternalFunctionImp(funcProto), id(i)
00072 {
00073   Value protect(this);
00074   putDirect(lengthPropertyName, len, DontDelete|ReadOnly|DontEnum);
00075   ident = _ident;
00076 }
00077 
00078 bool RegExpProtoFuncImp::implementsCall() const
00079 {
00080   return true;
00081 }
00082 
00083 Value RegExpProtoFuncImp::call(ExecState *exec, Object &thisObj, const List &args)
00084 {
00085   if (!thisObj.inherits(&RegExpImp::info)) {
00086     if (thisObj.inherits(&RegExpPrototypeImp::info)) {
00087       switch (id) {
00088         case ToString: return String("//"); // FireFox returns /(?:)/
00089       }
00090     }
00091     Object err = Error::create(exec,TypeError);
00092     exec->setException(err);
00093     return err;
00094   }
00095 
00096   RegExpImp *reimp = static_cast<RegExpImp*>(thisObj.imp());
00097   RegExp *re = reimp->regExp();
00098   String s;
00099   UString str;
00100   switch (id) {
00101   case Exec:      // 15.10.6.2
00102   case Test:
00103   {
00104     s = args[0].toString(exec);
00105     int length = s.value().size();
00106 
00107     // Get values from the last time (in case of /g)
00108     Value lastIndex = thisObj.get(exec,"lastIndex");
00109     int i = lastIndex.isValid() ? lastIndex.toInt32(exec) : 0;
00110     bool globalFlag = thisObj.get(exec,"global").toBoolean(exec);
00111     if (!globalFlag)
00112       i = 0;
00113     if (i < 0 || i > length) {
00114       thisObj.put(exec,"lastIndex", Number(0), DontDelete | DontEnum);
00115       if (id == Test)
00116         return Boolean(false);
00117       else
00118         return Null();
00119     }
00120     RegExpObjectImp* regExpObj = static_cast<RegExpObjectImp*>(exec->lexicalInterpreter()->builtinRegExp().imp());
00121     int **ovector = regExpObj->registerRegexp( re, s.value() );
00122 
00123     re->prepareMatch(s.value());
00124     str = re->match(s.value(), i, 0L, ovector);
00125     re->doneMatch();
00126     regExpObj->setSubPatterns(re->subPatterns());
00127 
00128     if (id == Test)
00129       return Boolean(!str.isNull());
00130 
00131     if (str.isNull()) // no match
00132     {
00133       if (globalFlag)
00134         thisObj.put(exec,"lastIndex",Number(0), DontDelete | DontEnum);
00135       return Null();
00136     }
00137     else // success
00138     {
00139       if (globalFlag)
00140         thisObj.put(exec,"lastIndex",Number( (*ovector)[1] ), DontDelete | DontEnum);
00141       return regExpObj->arrayOfMatches(exec,str);
00142     }
00143   }
00144   break;
00145   case ToString:
00146     s = thisObj.get(exec,"source").toString(exec);
00147     str = "/";
00148     str += s.value();
00149     str += "/";
00150     if (thisObj.get(exec,"global").toBoolean(exec)) {
00151       str += "g";
00152     }
00153     if (thisObj.get(exec,"ignoreCase").toBoolean(exec)) {
00154       str += "i";
00155     }
00156     if (thisObj.get(exec,"multiline").toBoolean(exec)) {
00157       str += "m";
00158     }
00159     return String(str);
00160   case Compile: {
00161       RegExp* newEngine = RegExpObjectImp::makeEngine(exec, args[0].toString(exec), args[1]);
00162       if (!newEngine)
00163         return exec->exception();
00164       reimp->setRegExp(newEngine);
00165       return Value(reimp);
00166     }
00167   }
00168   
00169 
00170   return Undefined();
00171 }
00172 
00173 // ------------------------------ RegExpImp ------------------------------------
00174 
00175 const ClassInfo RegExpImp::info = {"RegExp", 0, 0, 0};
00176 
00177 RegExpImp::RegExpImp(RegExpPrototypeImp *regexpProto)
00178   : ObjectImp(regexpProto), reg(0L)
00179 {
00180 }
00181 
00182 RegExpImp::~RegExpImp()
00183 {
00184   delete reg;
00185 }
00186 
00187 void RegExpImp::setRegExp(RegExp *r)
00188 {
00189   delete reg;
00190   reg = r;
00191 
00192   Object protect(this);//Protect self from GC (we are allocating a StringImp, and may be new)
00193   putDirect("global", (r->flags() & RegExp::Global) ? BooleanImp::staticTrue : BooleanImp::staticFalse, 
00194             DontDelete | ReadOnly | DontEnum);
00195   putDirect("ignoreCase", (r->flags() & RegExp::IgnoreCase) ? BooleanImp::staticTrue : BooleanImp::staticFalse, 
00196             DontDelete | ReadOnly | DontEnum);
00197   putDirect("multiline", (r->flags() & RegExp::Multiline) ? BooleanImp::staticTrue : BooleanImp::staticFalse, 
00198             DontDelete | ReadOnly | DontEnum);
00199 
00200   putDirect("source", new StringImp(r->pattern()), DontDelete | ReadOnly | DontEnum);
00201   putDirect("lastIndex", NumberImp::zero(), DontDelete | DontEnum);
00202 }
00203 
00204 // ------------------------------ RegExpObjectImp ------------------------------
00205 
00206 RegExpObjectImp::RegExpObjectImp(ExecState * /*exec*/,
00207                                  FunctionPrototypeImp *funcProto,
00208                                  RegExpPrototypeImp *regProto)
00209 
00210   : InternalFunctionImp(funcProto), lastOvector(0L), lastNrSubPatterns(0)
00211 {
00212   Value protect(this);
00213   // ECMA 15.10.5.1 RegExp.prototype
00214   putDirect(prototypePropertyName, regProto, DontEnum|DontDelete|ReadOnly);
00215 
00216   // no. of arguments for constructor
00217   putDirect(lengthPropertyName, NumberImp::two(), ReadOnly|DontDelete|DontEnum);
00218 }
00219 
00220 RegExpObjectImp::~RegExpObjectImp()
00221 {
00222   delete [] lastOvector;
00223 }
00224 
00225 int **RegExpObjectImp::registerRegexp( const RegExp* re, const UString& s )
00226 {
00227   lastString = s;
00228   delete [] lastOvector;
00229   lastOvector = 0;
00230   lastNrSubPatterns = re->subPatterns();
00231   return &lastOvector;
00232 }
00233 
00234 Object RegExpObjectImp::arrayOfMatches(ExecState *exec, const UString &result) const
00235 {
00236   List list;
00237   // The returned array contains 'result' as first item, followed by the list of matches
00238   list.append(String(result));
00239   if ( lastOvector )
00240     for ( unsigned int i = 1 ; i < lastNrSubPatterns + 1 ; ++i )
00241     {
00242       UString substring = lastString.substr( lastOvector[2*i], lastOvector[2*i+1] - lastOvector[2*i] );
00243       list.append(String(substring));
00244     }
00245   Object arr = exec->lexicalInterpreter()->builtinArray().construct(exec, list);
00246   arr.put(exec, "index", Number(lastOvector[0]));
00247   arr.put(exec, "input", String(lastString));
00248   return arr;
00249 }
00250 
00251 Value RegExpObjectImp::get(ExecState *exec, const Identifier &p) const
00252 {
00253   UString s = p.ustring();
00254   if (s[0] == '$' && lastOvector)
00255   {
00256     bool ok;
00257     unsigned long i = s.substr(1).toULong(&ok);
00258     if (ok)
00259     {
00260       if (i < lastNrSubPatterns + 1)
00261       {
00262         UString substring = lastString.substr( lastOvector[2*i], lastOvector[2*i+1] - lastOvector[2*i] );
00263         return String(substring);
00264       }
00265       return String("");
00266     }
00267   }
00268   return InternalFunctionImp::get(exec, p);
00269 }
00270 
00271 bool RegExpObjectImp::hasProperty(ExecState *exec, const Identifier &p) const
00272 {
00273   UString s = p.ustring();
00274   if (s[0] == '$' && lastOvector) {
00275     bool ok;
00276     (void)s.substr(1).toULong(&ok);
00277     if (ok)
00278       return true;
00279   }
00280 
00281   return InternalFunctionImp::hasProperty(exec, p);
00282 }
00283 
00284 bool RegExpObjectImp::implementsConstruct() const
00285 {
00286   return true;
00287 }
00288 
00289 RegExp* RegExpObjectImp::makeEngine(ExecState *exec, const UString &p, const Value &flagsInput)
00290 {
00291   UString flags = flagsInput.type() == UndefinedType ? UString("") : flagsInput.toString(exec);
00292 
00293   // Check for validity of flags
00294   for (int pos = 0; pos < flags.size(); ++pos) {
00295     switch (flags[pos].unicode()) {
00296     case 'g':
00297     case 'i':
00298     case 'm':
00299       break;
00300     default: {
00301         Object err = Error::create(exec, SyntaxError,
00302                     "Invalid regular expression flags");
00303         exec->setException(err);
00304         return 0;
00305       }
00306     }
00307   }
00308 
00309   bool global = (flags.find("g") >= 0);
00310   bool ignoreCase = (flags.find("i") >= 0);
00311   bool multiline = (flags.find("m") >= 0);
00312 
00313   int reflags = RegExp::None;
00314   if (global)
00315       reflags |= RegExp::Global;
00316   if (ignoreCase)
00317       reflags |= RegExp::IgnoreCase;
00318   if (multiline)
00319       reflags |= RegExp::Multiline;
00320 
00321   RegExp *re = new RegExp(p, reflags);
00322   if (!re->isValid()) {
00323     Object err = Error::create(exec, SyntaxError,
00324                                "Invalid regular expression");
00325     exec->setException(err);
00326     delete re;
00327     return 0;
00328   }
00329   return re;
00330 }
00331 
00332 // ECMA 15.10.4
00333 Object RegExpObjectImp::construct(ExecState *exec, const List &args)
00334 {
00335   UString p;
00336   if (args.isEmpty()) {
00337       p = "";
00338   } else {
00339     Value a0 = args[0];
00340     if (a0.isA(ObjectType) && a0.toObject(exec).inherits(&RegExpImp::info)) {
00341       // It's a regexp. Check that no flags were passed.
00342       if (args.size() > 1 && args[1].type() != UndefinedType) {
00343           Object err = Error::create(exec,TypeError);
00344           exec->setException(err);
00345           return err;
00346       }
00347       RegExpImp *rimp = static_cast<RegExpImp*>(Object::dynamicCast(a0).imp());
00348       p = rimp->regExp()->pattern();
00349     } else {
00350       p = a0.toString(exec);
00351     }
00352   }
00353 
00354   RegExp* re = makeEngine(exec, p, args[1]);
00355   if (!re)
00356     return exec->exception().toObject(exec);
00357 
00358   RegExpPrototypeImp *proto = static_cast<RegExpPrototypeImp*>(exec->lexicalInterpreter()->builtinRegExpPrototype().imp());
00359   RegExpImp *dat = new RegExpImp(proto);
00360   Object obj(dat); // protect from GC
00361   dat->setRegExp(re);
00362 
00363   return obj;
00364 }
00365 
00366 bool RegExpObjectImp::implementsCall() const
00367 {
00368   return true;
00369 }
00370 
00371 // ECMA 15.10.3
00372 Value RegExpObjectImp::call(ExecState *exec, Object &/*thisObj*/,
00373                 const List &args)
00374 {
00375   // TODO: handle RegExp argument case (15.10.3.1)
00376 
00377   return construct(exec, args);
00378 }
KDE Home | KDE Accessibility Home | Description of Access Keys