forked from expo/troggle
372 lines
15 KiB
JavaScript
372 lines
15 KiB
JavaScript
/*
|
|
Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
|
|
The copyrights embodied in the content of this file are licensed by
|
|
Yahoo! Inc. under the BSD (revised) open source license
|
|
|
|
@author Dan Vlad Dascalescu <dandv@yahoo-inc.com>
|
|
|
|
|
|
Parse function for PHP. Makes use of the tokenizer from tokenizephp.js.
|
|
Based on parsejavascript.js by Marijn Haverbeke.
|
|
|
|
|
|
Features:
|
|
+ special "deprecated" style for PHP4 keywords like 'var'
|
|
+ support for PHP 5.3 keywords: 'namespace', 'use'
|
|
+ 911 predefined constants, 1301 predefined functions, 105 predeclared classes
|
|
from a typical PHP installation in a LAMP environment
|
|
+ new feature: syntax error flagging, thus enabling strict parsing of:
|
|
+ function definitions with explicitly or implicitly typed arguments and default values
|
|
+ modifiers (public, static etc.) applied to method and member definitions
|
|
+ foreach(array_expression as $key [=> $value]) loops
|
|
+ differentiation between single-quoted strings and double-quoted interpolating strings
|
|
|
|
*/
|
|
|
|
|
|
// add the Array.indexOf method for JS engines that don't support it (e.g. IE)
|
|
// code from https://developer.mozilla.org/En/Core_JavaScript_1.5_Reference/Global_Objects/Array/IndexOf
|
|
if (!Array.prototype.indexOf)
|
|
{
|
|
Array.prototype.indexOf = function(elt /*, from*/)
|
|
{
|
|
var len = this.length;
|
|
|
|
var from = Number(arguments[1]) || 0;
|
|
from = (from < 0)
|
|
? Math.ceil(from)
|
|
: Math.floor(from);
|
|
if (from < 0)
|
|
from += len;
|
|
|
|
for (; from < len; from++)
|
|
{
|
|
if (from in this &&
|
|
this[from] === elt)
|
|
return from;
|
|
}
|
|
return -1;
|
|
};
|
|
};
|
|
|
|
|
|
var PHPParser = Editor.Parser = (function() {
|
|
// Token types that can be considered to be atoms, part of operator expressions
|
|
var atomicTypes = {
|
|
"atom": true, "number": true, "variable": true, "string": true
|
|
};
|
|
// Constructor for the lexical context objects.
|
|
function PHPLexical(indented, column, type, align, prev, info) {
|
|
// indentation at start of this line
|
|
this.indented = indented;
|
|
// column at which this scope was opened
|
|
this.column = column;
|
|
// type of scope ('stat' (statement), 'form' (special form), '[', '{', or '(')
|
|
this.type = type;
|
|
// '[', '{', or '(' blocks that have any text after their opening
|
|
// character are said to be 'aligned' -- any lines below are
|
|
// indented all the way to the opening character.
|
|
if (align != null)
|
|
this.align = align;
|
|
// Parent scope, if any.
|
|
this.prev = prev;
|
|
this.info = info;
|
|
};
|
|
|
|
// PHP indentation rules
|
|
function indentPHP(lexical) {
|
|
return function(firstChars) {
|
|
var firstChar = firstChars && firstChars.charAt(0), type = lexical.type;
|
|
var closing = firstChar == type;
|
|
if (type == "form" && firstChar == "{")
|
|
return lexical.indented;
|
|
else if (type == "stat" || type == "form")
|
|
return lexical.indented + indentUnit;
|
|
else if (lexical.info == "switch" && !closing)
|
|
return lexical.indented + (/^(?:case|default)\b/.test(firstChars) ? indentUnit : 2 * indentUnit);
|
|
else if (lexical.align)
|
|
return lexical.column - (closing ? 1 : 0);
|
|
else
|
|
return lexical.indented + (closing ? 0 : indentUnit);
|
|
};
|
|
};
|
|
|
|
// The parser-iterator-producing function itself.
|
|
function parsePHP(input, basecolumn) {
|
|
// Wrap the input in a token stream
|
|
var tokens = tokenizePHP(input);
|
|
// The parser state. cc is a stack of actions that have to be
|
|
// performed to finish the current statement. For example we might
|
|
// know that we still need to find a closing parenthesis and a
|
|
// semicolon. Actions at the end of the stack go first. It is
|
|
// initialized with an infinitely looping action that consumes
|
|
// whole statements.
|
|
var cc = [statements];
|
|
// The lexical scope, used mostly for indentation.
|
|
var lexical = new PHPLexical((basecolumn || 0) - indentUnit, 0, "block", false);
|
|
// Current column, and the indentation at the start of the current
|
|
// line. Used to create lexical scope objects.
|
|
var column = 0;
|
|
var indented = 0;
|
|
// Variables which are used by the mark, cont, and pass functions
|
|
// below to communicate with the driver loop in the 'next' function.
|
|
var consume, marked;
|
|
|
|
// The iterator object.
|
|
var parser = {next: next, copy: copy};
|
|
|
|
// parsing is accomplished by calling next() repeatedly
|
|
function next(){
|
|
// Start by performing any 'lexical' actions (adjusting the
|
|
// lexical variable), or the operations below will be working
|
|
// with the wrong lexical state.
|
|
while(cc[cc.length - 1].lex)
|
|
cc.pop()();
|
|
|
|
// Fetch the next token.
|
|
var token = tokens.next();
|
|
|
|
// Adjust column and indented.
|
|
if (token.type == "whitespace" && column == 0)
|
|
indented = token.value.length;
|
|
column += token.value.length;
|
|
if (token.content == "\n"){
|
|
indented = column = 0;
|
|
// If the lexical scope's align property is still undefined at
|
|
// the end of the line, it is an un-aligned scope.
|
|
if (!("align" in lexical))
|
|
lexical.align = false;
|
|
// Newline tokens get an indentation function associated with
|
|
// them.
|
|
token.indentation = indentPHP(lexical);
|
|
}
|
|
// No more processing for meaningless tokens.
|
|
if (token.type == "whitespace" || token.type == "comment"
|
|
|| token.type == "string_not_terminated" )
|
|
return token;
|
|
// When a meaningful token is found and the lexical scope's
|
|
// align is undefined, it is an aligned scope.
|
|
if (!("align" in lexical))
|
|
lexical.align = true;
|
|
|
|
// Execute actions until one 'consumes' the token and we can
|
|
// return it. 'marked' is used to change the style of the current token.
|
|
while(true) {
|
|
consume = marked = false;
|
|
// Take and execute the topmost action.
|
|
var action = cc.pop();
|
|
action(token);
|
|
|
|
if (consume){
|
|
if (marked)
|
|
token.style = marked;
|
|
// Here we differentiate between local and global variables.
|
|
return token;
|
|
}
|
|
}
|
|
return 1; // Firebug workaround for http://code.google.com/p/fbug/issues/detail?id=1239#c1
|
|
}
|
|
|
|
// This makes a copy of the parser state. It stores all the
|
|
// stateful variables in a closure, and returns a function that
|
|
// will restore them when called with a new input stream. Note
|
|
// that the cc array has to be copied, because it is contantly
|
|
// being modified. Lexical objects are not mutated, so they can
|
|
// be shared between runs of the parser.
|
|
function copy(){
|
|
var _lexical = lexical, _cc = cc.concat([]), _tokenState = tokens.state;
|
|
|
|
return function copyParser(input){
|
|
lexical = _lexical;
|
|
cc = _cc.concat([]); // copies the array
|
|
column = indented = 0;
|
|
tokens = tokenizePHP(input, _tokenState);
|
|
return parser;
|
|
};
|
|
}
|
|
|
|
// Helper function for pushing a number of actions onto the cc
|
|
// stack in reverse order.
|
|
function push(fs){
|
|
for (var i = fs.length - 1; i >= 0; i--)
|
|
cc.push(fs[i]);
|
|
}
|
|
// cont and pass are used by the action functions to add other
|
|
// actions to the stack. cont will cause the current token to be
|
|
// consumed, pass will leave it for the next action.
|
|
function cont(){
|
|
push(arguments);
|
|
consume = true;
|
|
}
|
|
function pass(){
|
|
push(arguments);
|
|
consume = false;
|
|
}
|
|
// Used to change the style of the current token.
|
|
function mark(style){
|
|
marked = style;
|
|
}
|
|
// Add a lyer of style to the current token, for example syntax-error
|
|
function mark_add(style){
|
|
marked = marked + ' ' + style;
|
|
}
|
|
|
|
// Push a new lexical context of the given type.
|
|
function pushlex(type, info) {
|
|
var result = function pushlexing() {
|
|
lexical = new PHPLexical(indented, column, type, null, lexical, info)
|
|
};
|
|
result.lex = true;
|
|
return result;
|
|
}
|
|
// Pop off the current lexical context.
|
|
function poplex(){
|
|
lexical = lexical.prev;
|
|
}
|
|
poplex.lex = true;
|
|
// The 'lex' flag on these actions is used by the 'next' function
|
|
// to know they can (and have to) be ran before moving on to the
|
|
// next token.
|
|
|
|
// Creates an action that discards tokens until it finds one of
|
|
// the given type. This will ignore (and recover from) syntax errors.
|
|
function expect(wanted){
|
|
return function expecting(token){
|
|
if (token.type == wanted) cont(); // consume the token
|
|
else {
|
|
cont(arguments.callee); // continue expecting() - call itself
|
|
}
|
|
};
|
|
}
|
|
|
|
// Require a specific token type, or one of the tokens passed in the 'wanted' array
|
|
// Used to detect blatant syntax errors. 'execute' is used to pass extra code
|
|
// to be executed if the token is matched. For example, a '(' match could
|
|
// 'execute' a cont( compasep(funcarg), require(")") )
|
|
function require(wanted, execute){
|
|
return function requiring(token){
|
|
var ok;
|
|
var type = token.type;
|
|
if (typeof(wanted) == "string")
|
|
ok = (type == wanted) -1;
|
|
else
|
|
ok = wanted.indexOf(type);
|
|
if (ok >= 0) {
|
|
if (execute && typeof(execute[ok]) == "function")
|
|
execute[ok](token);
|
|
cont(); // just consume the token
|
|
}
|
|
else {
|
|
if (!marked) mark(token.style);
|
|
mark_add("syntax-error");
|
|
cont(arguments.callee);
|
|
}
|
|
};
|
|
}
|
|
|
|
// Looks for a statement, and then calls itself.
|
|
function statements(token){
|
|
return pass(statement, statements);
|
|
}
|
|
// Dispatches various types of statements based on the type of the current token.
|
|
function statement(token){
|
|
var type = token.type;
|
|
if (type == "keyword a") cont(pushlex("form"), expression, statement, poplex);
|
|
else if (type == "keyword b") cont(pushlex("form"), statement, poplex);
|
|
else if (type == "{") cont(pushlex("}"), block, poplex);
|
|
else if (type == "function") funcdef();
|
|
// technically, "class implode {...}" is correct, but we'll flag that as an error because it overrides a predefined function
|
|
else if (type == "class") cont(require("t_string"), expect("{"), pushlex("}"), block, poplex);
|
|
else if (type == "foreach") cont(pushlex("form"), require("("), pushlex(")"), expression, require("as"), require("variable"), /* => $value */ expect(")"), poplex, statement, poplex);
|
|
else if (type == "for") cont(pushlex("form"), require("("), pushlex(")"), expression, require(";"), expression, require(";"), expression, require(")"), poplex, statement, poplex);
|
|
// public final function foo(), protected static $bar;
|
|
else if (type == "modifier") cont(require(["modifier", "variable", "function"], [null, null, funcdef]));
|
|
else if (type == "switch") cont(pushlex("form"), require("("), expression, require(")"), pushlex("}", "switch"), require([":", "{"]), block, poplex, poplex);
|
|
else if (type == "case") cont(expression, require(":"));
|
|
else if (type == "default") cont(require(":"));
|
|
else if (type == "catch") cont(pushlex("form"), require("("), require("t_string"), require("variable"), require(")"), statement, poplex);
|
|
else if (type == "const") cont(require("t_string")); // 'const static x=5' is a syntax error
|
|
// technically, "namespace implode {...}" is correct, but we'll flag that as an error because it overrides a predefined function
|
|
else if (type == "namespace") cont(namespacedef, require(";"));
|
|
// $variables may be followed by operators, () for variable function calls, or [] subscripts
|
|
else pass(pushlex("stat"), expression, require(";"), poplex);
|
|
}
|
|
// Dispatch expression types.
|
|
function expression(token){
|
|
var type = token.type;
|
|
if (atomicTypes.hasOwnProperty(type)) cont(maybeoperator);
|
|
else if (type == "<<<") cont(require("string"), maybeoperator); // heredoc/nowdoc
|
|
else if (type == "t_string") cont(maybe_double_colon, maybeoperator);
|
|
else if (type == "keyword c") cont(expression);
|
|
// function call or parenthesized expression: $a = ($b + 1) * 2;
|
|
else if (type == "(") cont(pushlex(")"), commasep(expression), require(")"), poplex, maybeoperator);
|
|
else if (type == "operator") cont(expression);
|
|
}
|
|
// Called for places where operators, function calls, or subscripts are
|
|
// valid. Will skip on to the next action if none is found.
|
|
function maybeoperator(token){
|
|
var type = token.type;
|
|
if (type == "operator") {
|
|
if (token.content == "?") cont(expression, require(":"), expression); // ternary operator
|
|
else cont(expression);
|
|
}
|
|
else if (type == "(") cont(pushlex(")"), expression, commasep(expression), require(")"), poplex, maybeoperator /* $varfunc() + 3 */);
|
|
else if (type == "[") cont(pushlex("]"), expression, require("]"), maybeoperator /* for multidimensional arrays, or $func[$i]() */, poplex);
|
|
}
|
|
// A regular use of the double colon to specify a class, as in self::func() or myclass::$var;
|
|
// Differs from `namespace` or `use` in that only one class can be the parent; chains (A::B::$var) are a syntax error.
|
|
function maybe_double_colon(token) {
|
|
if (token.type == "t_double_colon")
|
|
// A::$var, A::func(), A::const
|
|
cont(require(["t_string", "variable"]), maybeoperator);
|
|
else {
|
|
// a t_string wasn't followed by ::, such as in a function call: foo()
|
|
pass(expression)
|
|
}
|
|
}
|
|
// the declaration or definition of a function
|
|
function funcdef() {
|
|
cont(require("t_string"), require("("), pushlex(")"), commasep(funcarg), require(")"), poplex, block);
|
|
}
|
|
// Parses a comma-separated list of the things that are recognized
|
|
// by the 'what' argument.
|
|
function commasep(what){
|
|
function proceed(token) {
|
|
if (token.type == ",") cont(what, proceed);
|
|
};
|
|
return function commaSeparated() {
|
|
pass(what, proceed);
|
|
};
|
|
}
|
|
// Look for statements until a closing brace is found.
|
|
function block(token) {
|
|
if (token.type == "}") cont();
|
|
else pass(statement, block);
|
|
}
|
|
function maybedefaultparameter(token){
|
|
if (token.content == "=") cont(expression);
|
|
}
|
|
// support for default arguments: http://us.php.net/manual/en/functions.arguments.php#functions.arguments.default
|
|
function funcarg(token){
|
|
// function foo(myclass $obj) {...}
|
|
if (token.type == "t_string") cont(require("variable"), maybedefaultparameter);
|
|
// function foo($string) {...}
|
|
else if (token.type == "variable") cont(maybedefaultparameter);
|
|
}
|
|
|
|
// A namespace definition or use
|
|
function maybe_double_colon_def(token) {
|
|
if (token.type == "t_double_colon")
|
|
cont(namespacedef);
|
|
}
|
|
function namespacedef(token) {
|
|
pass(require("t_string"), maybe_double_colon_def);
|
|
}
|
|
|
|
return parser;
|
|
}
|
|
|
|
return {make: parsePHP, electricChars: "{}:"};
|
|
|
|
})();
|