458 lines
11 KiB
Plaintext
458 lines
11 KiB
Plaintext
(function () {
|
|
var $P = Date.Parsing;
|
|
var _ = $P.Operators = {
|
|
//
|
|
// Tokenizers
|
|
//
|
|
rtoken: function (r) { // regex token
|
|
return function (s) {
|
|
var mx = s.match(r);
|
|
if (mx) {
|
|
return ([ mx[0], s.substring(mx[0].length) ]);
|
|
} else {
|
|
throw new $P.Exception(s);
|
|
}
|
|
};
|
|
},
|
|
token: function () { // whitespace-eating token
|
|
return function (s) {
|
|
return _.rtoken(new RegExp("^\\s*" + s + "\\s*"))(s);
|
|
};
|
|
},
|
|
stoken: function (s) { // string token
|
|
return _.rtoken(new RegExp("^" + s));
|
|
},
|
|
|
|
// Atomic Operators
|
|
|
|
until: function (p) {
|
|
return function (s) {
|
|
var qx = [], rx = null;
|
|
while (s.length) {
|
|
try {
|
|
rx = p.call(this, s);
|
|
} catch (e) {
|
|
qx.push(rx[0]);
|
|
s = rx[1];
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
return [ qx, s ];
|
|
};
|
|
},
|
|
many: function (p) {
|
|
return function (s) {
|
|
var rx = [], r = null;
|
|
while (s.length) {
|
|
try {
|
|
r = p.call(this, s);
|
|
} catch (e) {
|
|
return [ rx, s ];
|
|
}
|
|
rx.push(r[0]);
|
|
s = r[1];
|
|
}
|
|
return [ rx, s ];
|
|
};
|
|
},
|
|
|
|
// generator operators -- see below
|
|
optional: function (p) {
|
|
return function (s) {
|
|
var r = null;
|
|
try {
|
|
r = p.call(this, s);
|
|
} catch (e) {
|
|
return [ null, s ];
|
|
}
|
|
return [ r[0], r[1] ];
|
|
};
|
|
},
|
|
not: function (p) {
|
|
return function (s) {
|
|
try {
|
|
p.call(this, s);
|
|
} catch (e) {
|
|
return [null, s];
|
|
}
|
|
throw new $P.Exception(s);
|
|
};
|
|
},
|
|
ignore: function (p) {
|
|
return p ?
|
|
function (s) {
|
|
var r = null;
|
|
r = p.call(this, s);
|
|
return [null, r[1]];
|
|
} : null;
|
|
},
|
|
product: function () {
|
|
var px = arguments[0],
|
|
qx = Array.prototype.slice.call(arguments, 1), rx = [];
|
|
for (var i = 0 ; i < px.length ; i++) {
|
|
rx.push(_.each(px[i], qx));
|
|
}
|
|
return rx;
|
|
},
|
|
cache: function (rule) {
|
|
var cache = {}, cache_length = 0, cache_keys = [], CACHE_MAX = Date.Config.CACHE_MAX || 100000, r = null;
|
|
var cacheCheck = function () {
|
|
if (cache_length === CACHE_MAX) {
|
|
// kill several keys, don't want to have to do this all the time...
|
|
for (var i=0; i < 10; i++) {
|
|
var key = cache_keys.shift();
|
|
if (key) {
|
|
delete cache[key];
|
|
cache_length--;
|
|
}
|
|
}
|
|
}
|
|
};
|
|
return function (s) {
|
|
cacheCheck();
|
|
try {
|
|
r = cache[s] = (cache[s] || rule.call(this, s));
|
|
} catch (e) {
|
|
r = cache[s] = e;
|
|
}
|
|
cache_length++;
|
|
cache_keys.push(s);
|
|
if (r instanceof $P.Exception) {
|
|
throw r;
|
|
} else {
|
|
return r;
|
|
}
|
|
};
|
|
},
|
|
|
|
// vector operators -- see below
|
|
any: function () {
|
|
var px = arguments;
|
|
return function (s) {
|
|
var r = null;
|
|
for (var i = 0; i < px.length; i++) {
|
|
if (px[i] == null) {
|
|
continue;
|
|
}
|
|
try {
|
|
r = (px[i].call(this, s));
|
|
} catch (e) {
|
|
r = null;
|
|
}
|
|
if (r) {
|
|
return r;
|
|
}
|
|
}
|
|
throw new $P.Exception(s);
|
|
};
|
|
},
|
|
each: function () {
|
|
var px = arguments;
|
|
return function (s) {
|
|
var rx = [], r = null;
|
|
for (var i = 0; i < px.length ; i++) {
|
|
if (px[i] == null) {
|
|
continue;
|
|
}
|
|
try {
|
|
r = (px[i].call(this, s));
|
|
} catch (e) {
|
|
throw new $P.Exception(s);
|
|
}
|
|
rx.push(r[0]);
|
|
s = r[1];
|
|
}
|
|
return [ rx, s];
|
|
};
|
|
},
|
|
all: function () {
|
|
var px = arguments, _ = _;
|
|
return _.each(_.optional(px));
|
|
},
|
|
|
|
// delimited operators
|
|
sequence: function (px, d, c) {
|
|
d = d || _.rtoken(/^\s*/);
|
|
c = c || null;
|
|
|
|
if (px.length === 1) {
|
|
return px[0];
|
|
}
|
|
return function (s) {
|
|
var r = null, q = null;
|
|
var rx = [];
|
|
for (var i = 0; i < px.length ; i++) {
|
|
try {
|
|
r = px[i].call(this, s);
|
|
} catch (e) {
|
|
break;
|
|
}
|
|
rx.push(r[0]);
|
|
try {
|
|
q = d.call(this, r[1]);
|
|
} catch (ex) {
|
|
q = null;
|
|
break;
|
|
}
|
|
s = q[1];
|
|
}
|
|
if (!r) {
|
|
throw new $P.Exception(s);
|
|
}
|
|
if (q) {
|
|
throw new $P.Exception(q[1]);
|
|
}
|
|
if (c) {
|
|
try {
|
|
r = c.call(this, r[1]);
|
|
} catch (ey) {
|
|
throw new $P.Exception(r[1]);
|
|
}
|
|
}
|
|
return [ rx, (r?r[1]:s) ];
|
|
};
|
|
},
|
|
|
|
//
|
|
// Composite Operators
|
|
//
|
|
|
|
between: function (d1, p, d2) {
|
|
d2 = d2 || d1;
|
|
var _fn = _.each(_.ignore(d1), p, _.ignore(d2));
|
|
return function (s) {
|
|
var rx = _fn.call(this, s);
|
|
return [[rx[0][0], r[0][2]], rx[1]];
|
|
};
|
|
},
|
|
list: function (p, d, c) {
|
|
d = d || _.rtoken(/^\s*/);
|
|
c = c || null;
|
|
return (p instanceof Array ?
|
|
_.each(_.product(p.slice(0, -1), _.ignore(d)), p.slice(-1), _.ignore(c)) :
|
|
_.each(_.many(_.each(p, _.ignore(d))), px, _.ignore(c)));
|
|
},
|
|
set: function (px, d, c) {
|
|
d = d || _.rtoken(/^\s*/);
|
|
c = c || null;
|
|
return function (s) {
|
|
// r is the current match, best the current 'best' match
|
|
// which means it parsed the most amount of input
|
|
var r = null, p = null, q = null, rx = null, best = [[], s], last = false;
|
|
// go through the rules in the given set
|
|
for (var i = 0; i < px.length ; i++) {
|
|
|
|
// last is a flag indicating whether this must be the last element
|
|
// if there is only 1 element, then it MUST be the last one
|
|
q = null;
|
|
p = null;
|
|
r = null;
|
|
last = (px.length === 1);
|
|
// first, we try simply to match the current pattern
|
|
// if not, try the next pattern
|
|
try {
|
|
r = px[i].call(this, s);
|
|
} catch (e) {
|
|
continue;
|
|
}
|
|
// since we are matching against a set of elements, the first
|
|
// thing to do is to add r[0] to matched elements
|
|
rx = [[r[0]], r[1]];
|
|
// if we matched and there is still input to parse and
|
|
// we don't already know this is the last element,
|
|
// we're going to next check for the delimiter ...
|
|
// if there's none, or if there's no input left to parse
|
|
// than this must be the last element after all ...
|
|
if (r[1].length > 0 && ! last) {
|
|
try {
|
|
q = d.call(this, r[1]);
|
|
} catch (ex) {
|
|
last = true;
|
|
}
|
|
} else {
|
|
last = true;
|
|
}
|
|
|
|
// if we parsed the delimiter and now there's no more input,
|
|
// that means we shouldn't have parsed the delimiter at all
|
|
// so don't update r and mark this as the last element ...
|
|
if (!last && q[1].length === 0) {
|
|
last = true;
|
|
}
|
|
|
|
|
|
// so, if this isn't the last element, we're going to see if
|
|
// we can get any more matches from the remaining (unmatched)
|
|
// elements ...
|
|
if (!last) {
|
|
// build a list of the remaining rules we can match against,
|
|
// i.e., all but the one we just matched against
|
|
var qx = [];
|
|
for (var j = 0; j < px.length ; j++) {
|
|
if (i !== j) {
|
|
qx.push(px[j]);
|
|
}
|
|
}
|
|
|
|
// now invoke recursively set with the remaining input
|
|
// note that we don't include the closing delimiter ...
|
|
// we'll check for that ourselves at the end
|
|
p = _.set(qx, d).call(this, q[1]);
|
|
|
|
// if we got a non-empty set as a result ...
|
|
// (otw rx already contains everything we want to match)
|
|
if (p[0].length > 0) {
|
|
// update current result, which is stored in rx ...
|
|
// basically, pick up the remaining text from p[1]
|
|
// and concat the result from p[0] so that we don't
|
|
// get endless nesting ...
|
|
rx[0] = rx[0].concat(p[0]);
|
|
rx[1] = p[1];
|
|
}
|
|
}
|
|
|
|
// at this point, rx either contains the last matched element
|
|
// or the entire matched set that starts with this element.
|
|
|
|
// now we just check to see if this variation is better than
|
|
// our best so far, in terms of how much of the input is parsed
|
|
if (rx[1].length < best[1].length) {
|
|
best = rx;
|
|
}
|
|
|
|
// if we've parsed all the input, then we're finished
|
|
if (best[1].length === 0) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
// so now we've either gone through all the patterns trying them
|
|
// as the initial match; or we found one that parsed the entire
|
|
// input string ...
|
|
|
|
// if best has no matches, just return empty set ...
|
|
if (best[0].length === 0) {
|
|
return best;
|
|
}
|
|
|
|
// if a closing delimiter is provided, then we have to check it also
|
|
if (c) {
|
|
// we try this even if there is no remaining input because the pattern
|
|
// may well be optional or match empty input ...
|
|
try {
|
|
q = c.call(this, best[1]);
|
|
} catch (ey) {
|
|
throw new $P.Exception(best[1]);
|
|
}
|
|
|
|
// it parsed ... be sure to update the best match remaining input
|
|
best[1] = q[1];
|
|
}
|
|
// if we're here, either there was no closing delimiter or we parsed it
|
|
// so now we have the best match; just return it!
|
|
return best;
|
|
};
|
|
},
|
|
forward: function (gr, fname) {
|
|
return function (s) {
|
|
return gr[fname].call(this, s);
|
|
};
|
|
},
|
|
|
|
//
|
|
// Translation Operators
|
|
//
|
|
replace: function (rule, repl) {
|
|
return function (s) {
|
|
var r = rule.call(this, s);
|
|
return [repl, r[1]];
|
|
};
|
|
},
|
|
process: function (rule, fn) {
|
|
return function (s) {
|
|
var r = rule.call(this, s);
|
|
return [fn.call(this, r[0]), r[1]];
|
|
};
|
|
},
|
|
min: function (min, rule) {
|
|
return function (s) {
|
|
var rx = rule.call(this, s);
|
|
if (rx[0].length < min) {
|
|
throw new $P.Exception(s);
|
|
}
|
|
return rx;
|
|
};
|
|
}
|
|
};
|
|
|
|
|
|
// Generator Operators And Vector Operators
|
|
|
|
// Generators are operators that have a signature of F(R) => R,
|
|
// taking a given rule and returning another rule, such as
|
|
// ignore, which parses a given rule and throws away the result.
|
|
|
|
// Vector operators are those that have a signature of F(R1,R2,...) => R,
|
|
// take a list of rules and returning a new rule, such as each.
|
|
|
|
// Generator operators are converted (via the following _generator
|
|
// function) into functions that can also take a list or array of rules
|
|
// and return an array of new rules as though the function had been
|
|
// called on each rule in turn (which is what actually happens).
|
|
|
|
// This allows generators to be used with vector operators more easily.
|
|
// Example:
|
|
// each(ignore(foo, bar)) instead of each(ignore(foo), ignore(bar))
|
|
|
|
// This also turns generators into vector operators, which allows
|
|
// constructs like:
|
|
// not(cache(foo, bar))
|
|
|
|
var _generator = function (op) {
|
|
function gen() {
|
|
var args = null, rx = [], px, i;
|
|
if (arguments.length > 1) {
|
|
args = Array.prototype.slice.call(arguments);
|
|
} else if (arguments[0] instanceof Array) {
|
|
args = arguments[0];
|
|
}
|
|
if (args) {
|
|
px = args.shift();
|
|
if (px.length > 0) {
|
|
args.unshift(px[i]);
|
|
rx.push(op.apply(null, args));
|
|
args.shift();
|
|
return rx;
|
|
}
|
|
} else {
|
|
return op.apply(null, arguments);
|
|
}
|
|
}
|
|
|
|
return gen;
|
|
};
|
|
|
|
var gx = "optional not ignore cache".split(/\s/);
|
|
|
|
for (var i = 0 ; i < gx.length ; i++) {
|
|
_[gx[i]] = _generator(_[gx[i]]);
|
|
}
|
|
|
|
var _vector = function (op) {
|
|
return function () {
|
|
if (arguments[0] instanceof Array) {
|
|
return op.apply(null, arguments[0]);
|
|
} else {
|
|
return op.apply(null, arguments);
|
|
}
|
|
};
|
|
};
|
|
|
|
var vx = "each any all".split(/\s/);
|
|
|
|
for (var j = 0 ; j < vx.length ; j++) {
|
|
_[vx[j]] = _vector(_[vx[j]]);
|
|
}
|
|
|
|
}()); |