2023-01-19 19:09:13 +01:00
|
|
|
function select(query, xs, config) {
|
2023-01-20 19:31:34 +01:00
|
|
|
// naive optimizations
|
|
|
|
if (query === '' || xs === []) {
|
|
|
|
return xs;
|
|
|
|
}
|
|
|
|
|
2023-01-20 00:19:49 +01:00
|
|
|
const predicate = compile(parse(query, config), config);
|
2023-01-12 05:55:21 +01:00
|
|
|
return xs.filter(predicate);
|
|
|
|
}
|
|
|
|
|
2023-01-19 19:09:13 +01:00
|
|
|
function compile(ast, config) {
|
2023-01-12 05:55:21 +01:00
|
|
|
if (ast.type === 'CONJUNCTION') {
|
2023-01-20 00:19:49 +01:00
|
|
|
const lhs = compile(ast.lhs, compile);
|
|
|
|
const rhs = compile(ast.rhs, compile);
|
2023-01-12 05:55:21 +01:00
|
|
|
|
|
|
|
if (ast.joint === 'AND') {
|
|
|
|
return function(x) {
|
|
|
|
return lhs(x) && rhs(x);
|
|
|
|
};
|
|
|
|
}
|
|
|
|
if (ast.joint === 'OR') {
|
|
|
|
return function(x) {
|
|
|
|
return lhs(x) || rhs(x);
|
|
|
|
};
|
|
|
|
}
|
|
|
|
}
|
2023-01-14 02:35:43 +01:00
|
|
|
if (ast.type === 'DATE_SELECTION') {
|
|
|
|
if (ast.key === 'before') {
|
|
|
|
return function(row) {
|
|
|
|
let t = new Date();
|
|
|
|
if (ast.val === 'yesterday') {
|
|
|
|
t.setDate(t.getDate() - 1);
|
|
|
|
console.log(t);
|
2023-01-19 19:09:13 +01:00
|
|
|
}
|
2023-01-14 02:35:43 +01:00
|
|
|
// MM/DD/YYYY
|
|
|
|
else {
|
|
|
|
t = new Date(ast.val);
|
|
|
|
}
|
2023-01-19 19:09:13 +01:00
|
|
|
return row[config.dateKey] < t;
|
2023-01-14 02:35:43 +01:00
|
|
|
};
|
|
|
|
}
|
|
|
|
if (ast.key === 'after') {
|
|
|
|
return function(row) {
|
|
|
|
let t = new Date();
|
|
|
|
if (ast.val === 'yesterday') {
|
|
|
|
t.setDate(t.getDate() - 1);
|
|
|
|
console.log(t);
|
2023-01-19 19:09:13 +01:00
|
|
|
}
|
2023-01-14 02:35:43 +01:00
|
|
|
// MM/DD/YYYY
|
|
|
|
else {
|
|
|
|
t = new Date(ast.val);
|
|
|
|
}
|
2023-01-19 19:09:13 +01:00
|
|
|
return row[config.dateKey] > t;
|
2023-01-14 02:35:43 +01:00
|
|
|
};
|
|
|
|
}
|
|
|
|
}
|
2023-01-20 17:04:22 +01:00
|
|
|
if (ast.type === 'COMPARE_SELECTION') {
|
|
|
|
const f = compile(ast.val, config);
|
|
|
|
|
|
|
|
let compare = null;
|
2023-01-20 19:29:52 +01:00
|
|
|
if (ast.operator === 'EQ') { compare = (x, y) => x === y; }
|
2023-01-20 17:04:22 +01:00
|
|
|
if (ast.operator === 'LT') { compare = (x, y) => x < y; }
|
|
|
|
if (ast.operator === 'GT') { compare = (x, y) => x > y; }
|
|
|
|
if (ast.operator === 'LTE') { compare = (x, y) => x <= y; }
|
|
|
|
if (ast.operator === 'GTE') { compare = (x, y) => x >= y; }
|
|
|
|
|
|
|
|
return function(row) {
|
|
|
|
return ast.negate ? !compare(row[ast.key], ast.val) : compare(row[ast.key], ast.val);
|
|
|
|
};
|
|
|
|
}
|
2023-01-12 05:55:21 +01:00
|
|
|
if (ast.type === 'SELECTION') {
|
2023-01-20 00:19:49 +01:00
|
|
|
const f = compile(ast.val, config);
|
2023-01-12 05:55:21 +01:00
|
|
|
return function(row) {
|
2023-01-14 02:35:43 +01:00
|
|
|
return ast.negate ? !f(row[ast.key]) : f(row[ast.key]);
|
2023-01-12 05:55:21 +01:00
|
|
|
};
|
|
|
|
}
|
2023-01-13 01:38:42 +01:00
|
|
|
if (ast.type === 'MATCH_ALL') {
|
|
|
|
if (ast.matchType === 'STRING') {
|
|
|
|
return function(row) {
|
|
|
|
return Object.values(row).some(x => {
|
2023-01-19 19:09:13 +01:00
|
|
|
if (config.caseSensitive) {
|
2023-01-13 01:38:42 +01:00
|
|
|
return x === ast.val;
|
|
|
|
} else {
|
|
|
|
return x.toLowerCase() === ast.val.toLowerCase();
|
|
|
|
}
|
|
|
|
})
|
|
|
|
};
|
|
|
|
}
|
|
|
|
if (ast.matchType === 'REGEX') {
|
|
|
|
return function(row) {
|
|
|
|
return Object.values(row).some(x => ast.val.test(x));
|
|
|
|
};
|
|
|
|
}
|
|
|
|
}
|
2023-01-21 00:28:55 +01:00
|
|
|
if (ast.type === 'GROUPING') {
|
|
|
|
return compile(ast.content);
|
|
|
|
}
|
2023-01-12 05:55:21 +01:00
|
|
|
if (ast.type === 'STRING') {
|
|
|
|
return function(x) {
|
2023-01-19 19:09:13 +01:00
|
|
|
if (config.caseSensitive) {
|
2023-01-12 05:55:21 +01:00
|
|
|
return x === ast.val;
|
|
|
|
} else {
|
|
|
|
return x.toLowerCase() === ast.val.toLowerCase();
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
if (ast.type === 'REGEX') {
|
|
|
|
return function(x) {
|
|
|
|
return ast.val.test(x);
|
|
|
|
};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// A "selection" without a "$column:" prefix should fuzzy-search all columns.
|
|
|
|
//
|
|
|
|
// conjunction -> selection ( ( "AND" | "OR" )? selection )* ;
|
|
|
|
// selection -> "-"? COLUMN ":" ( regex | string ) | regex ;
|
|
|
|
// regex -> [_-a-zA-Z0-9] | "/" [ _-a-zA-Z0-9] "/" | string ;
|
|
|
|
// string -> "\"" [ _-a-zA-Z0-9] "\"" ;
|
|
|
|
|
|
|
|
// Whatever characters are valid for a JS regex.
|
|
|
|
const ATOM_REGEX = /[-_.\[\]a-zA-Z0-9*+^$]/;
|
|
|
|
|
|
|
|
function tokenize(x) {
|
|
|
|
const result = [];
|
|
|
|
let i = 0;
|
|
|
|
while (i < x.length) {
|
|
|
|
if (x[i] === ' ') {
|
|
|
|
i += 1;
|
|
|
|
while (i < x.length && x[i] === ' ') {
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
result.push(['WHITESPACE', null]);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (x[i] === '-') {
|
|
|
|
result.push(['NEGATE', null]);
|
|
|
|
i += 1;
|
|
|
|
continue;
|
|
|
|
}
|
2023-01-20 17:04:22 +01:00
|
|
|
// Tokenize numbers (i.e. integers, floats).
|
|
|
|
if (/[0-9]/.test(x[i])) {
|
|
|
|
let curr = x[i];
|
|
|
|
i += 1;
|
|
|
|
while (i < x.length && /[0-9]/.test(x[i])) {
|
|
|
|
curr += x[i];
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
result.push(['NUMBER', parseFloat(curr)]);
|
|
|
|
continue;
|
|
|
|
}
|
2023-01-12 05:55:21 +01:00
|
|
|
if (ATOM_REGEX.test(x[i])) {
|
|
|
|
let curr = x[i];
|
|
|
|
i += 1;
|
|
|
|
while (i < x.length && ATOM_REGEX.test(x[i])) {
|
|
|
|
curr += x[i];
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
result.push(['ATOM', curr]);
|
|
|
|
continue;
|
|
|
|
}
|
2023-01-20 19:29:52 +01:00
|
|
|
if (x[i] === '=') {
|
|
|
|
result.push(['COMPARE', 'EQ']);
|
|
|
|
i += 1;
|
|
|
|
continue;
|
|
|
|
}
|
2023-01-20 17:04:22 +01:00
|
|
|
if (x[i] === '<' && i + 1 < x.length && x[i + 1] === '=') {
|
|
|
|
result.push(['COMPARE', 'LTE']);
|
2023-01-21 00:47:14 +01:00
|
|
|
i += 2;
|
2023-01-20 17:04:22 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (x[i] === '<') {
|
|
|
|
result.push(['COMPARE', 'LT']);
|
|
|
|
i += 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (x[i] === '>' && i + i < x.length && x[i + 1] === '=') {
|
|
|
|
result.push(['COMPARE', 'GTE']);
|
2023-01-21 00:47:14 +01:00
|
|
|
i += 2;
|
2023-01-20 17:04:22 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (x[i] === '>') {
|
|
|
|
result.push(['COMPARE', 'GT']);
|
|
|
|
i += 1;
|
|
|
|
continue;
|
|
|
|
}
|
2023-01-12 05:55:21 +01:00
|
|
|
if (x[i] === ':') {
|
|
|
|
result.push(['COLON', null]);
|
|
|
|
i += 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (x[i] === '(') {
|
|
|
|
result.push(['LPAREN', null]);
|
|
|
|
i += 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (x[i] === ')') {
|
|
|
|
result.push(['RPAREN', null]);
|
|
|
|
i += 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (x[i] === '/') {
|
|
|
|
let start = i;
|
|
|
|
let curr = '';
|
|
|
|
i += 1;
|
|
|
|
while (i < x.length && x[i] !== '/') {
|
|
|
|
curr += x[i];
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
// error
|
|
|
|
if (i >= x.length) {
|
|
|
|
throw `Tokenize Error: EOL while attempting to tokenize the regex beginning at column: ${start}`;
|
|
|
|
}
|
|
|
|
if (x[i] === '/') {
|
|
|
|
result.push(['REGEX', curr]);
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (x[i] === '"') {
|
|
|
|
let start = i;
|
|
|
|
let curr = '';
|
|
|
|
i += 1;
|
|
|
|
while (i < x.length && x[i] !== '"') {
|
|
|
|
// continue on \"
|
|
|
|
if (x[i] === '\\' && x[i + 1] === '"') {
|
|
|
|
curr += '\"';
|
|
|
|
i += 2;
|
|
|
|
} else {
|
|
|
|
curr += x[i];
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (i >= x.length) {
|
|
|
|
throw `Tokenize Error: EOL while attempting to tokenize the string starting at column: ${start}`;
|
|
|
|
}
|
|
|
|
if (x[i] === '"') {
|
|
|
|
result.push(['STRING', curr]);
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
function expect(f, expectation, p) {
|
|
|
|
const [type, val] = p.tokens[p.i];
|
|
|
|
if (f(type, val)) {
|
|
|
|
p.i += 1;
|
|
|
|
} else {
|
|
|
|
throw `Parse Error: expected ${expectation}, but got ${p.tokens[p.i]}; ${JSON.stringify(p)}`
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function matches(f, p) {
|
|
|
|
const [type, val] = p.tokens[p.i];
|
|
|
|
if (f(type, val)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
function match(f, expectation, p) {
|
|
|
|
const [type, val] = p.tokens[p.i];
|
|
|
|
if (f(type, val)) {
|
|
|
|
p.i += 1;
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
throw `Parse Error: expected ${expectation}, but got: ${p.tokens[p.i]}; ${JSON.stringify(p)}`;
|
|
|
|
}
|
|
|
|
|
|
|
|
function skipWhitespace(p) {
|
|
|
|
while (p.i < p.tokens.length && matches((type, _) => type === 'WHITESPACE', p)) {
|
|
|
|
p.i += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-21 00:28:55 +01:00
|
|
|
function peekType(n, p) {
|
|
|
|
if (p.i + n < p.tokens.length) {
|
|
|
|
return p.tokens[p.i + n][0];
|
|
|
|
}
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2023-01-12 05:55:21 +01:00
|
|
|
function parser(tokens) {
|
|
|
|
return { i: 0, tokens };
|
|
|
|
}
|
|
|
|
|
2023-01-20 00:19:49 +01:00
|
|
|
function parse(x, config) {
|
2023-01-12 05:55:21 +01:00
|
|
|
const tokens = tokenize(x);
|
|
|
|
const p = parser(tokens);
|
2023-01-20 00:19:49 +01:00
|
|
|
return conjunction(p, config);
|
2023-01-12 05:55:21 +01:00
|
|
|
}
|
|
|
|
|
2023-01-20 00:19:49 +01:00
|
|
|
function conjunction(p, config) {
|
2023-01-12 05:55:21 +01:00
|
|
|
skipWhitespace(p);
|
|
|
|
|
2023-01-20 00:19:49 +01:00
|
|
|
const lhs = selection(p, config);
|
2023-01-12 05:55:21 +01:00
|
|
|
skipWhitespace(p);
|
|
|
|
|
2023-01-21 00:28:55 +01:00
|
|
|
// TODO(wpcarro): Consider re-architecting the parser to avoid smells like
|
|
|
|
// this.
|
|
|
|
if (peekType(0, p) === 'RPAREN') {
|
|
|
|
return lhs;
|
|
|
|
}
|
|
|
|
|
2023-01-12 05:55:21 +01:00
|
|
|
if (p.i >= p.tokens.length) {
|
|
|
|
return lhs;
|
|
|
|
}
|
|
|
|
|
|
|
|
let joint = 'AND';
|
|
|
|
if (matches((type, val) => type === 'ATOM' && val === 'AND', p)) {
|
|
|
|
joint = 'AND';
|
|
|
|
p.i += 1;
|
|
|
|
} else if (matches((type, val) => type === 'ATOM' && val === 'OR', p)) {
|
|
|
|
joint = 'OR';
|
|
|
|
p.i += 1;
|
|
|
|
}
|
|
|
|
skipWhitespace(p);
|
2023-01-21 00:28:55 +01:00
|
|
|
const rhs = conjunction(p, config);
|
2023-01-12 05:55:21 +01:00
|
|
|
|
|
|
|
return {
|
|
|
|
type: 'CONJUNCTION',
|
|
|
|
joint,
|
|
|
|
lhs,
|
|
|
|
rhs,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2023-01-20 00:19:49 +01:00
|
|
|
function selection(p, config) {
|
2023-01-12 05:55:21 +01:00
|
|
|
// column:value OR -column:value
|
2023-01-13 01:38:42 +01:00
|
|
|
if ((peekType(0, p) === 'ATOM' && peekType(1, p) === 'COLON') ||
|
|
|
|
(peekType(0, p) === 'NEGATE' && peekType(1, p) === 'ATOM' && peekType(2, p) === 'COLON')) {
|
2023-01-14 02:35:43 +01:00
|
|
|
|
2023-01-12 05:55:21 +01:00
|
|
|
let negate = false;
|
|
|
|
if (p.tokens[p.i][0] === 'NEGATE') {
|
|
|
|
negate = true;
|
|
|
|
p.i += 1;
|
|
|
|
}
|
2023-01-14 02:35:43 +01:00
|
|
|
|
2023-01-12 05:55:21 +01:00
|
|
|
const key = match((type, _) => type === 'ATOM', 'a column label', p);
|
|
|
|
expect((type, val) => type === 'COLON', 'a colon', p);
|
2023-01-14 02:35:43 +01:00
|
|
|
|
|
|
|
if (key === 'before' || key === 'after') {
|
|
|
|
const val = date(p);
|
|
|
|
return {
|
|
|
|
type: 'DATE_SELECTION',
|
|
|
|
key,
|
|
|
|
val,
|
|
|
|
};
|
|
|
|
} else {
|
2023-01-20 00:19:49 +01:00
|
|
|
const val = value(p, config);
|
2023-01-14 02:35:43 +01:00
|
|
|
return {
|
|
|
|
type: 'SELECTION',
|
|
|
|
negate,
|
|
|
|
key,
|
|
|
|
val,
|
|
|
|
};
|
|
|
|
}
|
2023-01-20 17:04:22 +01:00
|
|
|
}
|
|
|
|
// column<value OR -column<value
|
|
|
|
else if ((peekType(0, p) === 'ATOM' && peekType(1, p) === 'COMPARE') ||
|
|
|
|
(peekType(0, p) === 'NEGATE' && peekType(1, p) === 'ATOM' && peekType(2, p) === 'COMPARE')) {
|
|
|
|
let negate = false;
|
|
|
|
if (p.tokens[p.i][0] === 'NEGATE') {
|
|
|
|
negate = true;
|
|
|
|
p.i += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
const key = match((type, _) => type === 'ATOM', 'a column label', p);
|
|
|
|
const operator = match((type, _) => type === 'COMPARE', 'a comparison operator (i.e. "<", ">", "<=", ">=")', p);
|
|
|
|
const val = match((type, _) => type === 'NUMBER', 'a number', p);
|
|
|
|
|
|
|
|
return {
|
|
|
|
type: 'COMPARE_SELECTION',
|
|
|
|
operator,
|
|
|
|
negate,
|
|
|
|
key,
|
|
|
|
val,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
else {
|
2023-01-19 19:09:13 +01:00
|
|
|
return matchAll(p, config);
|
2023-01-13 01:38:42 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-19 19:09:13 +01:00
|
|
|
function matchAll(p, config) {
|
2023-01-13 01:38:42 +01:00
|
|
|
const [type, val] = p.tokens[p.i];
|
|
|
|
|
2023-01-19 19:09:13 +01:00
|
|
|
// Cast atoms into strings or regexes depending on the current config.
|
2023-01-13 01:38:42 +01:00
|
|
|
if (type === 'ATOM') {
|
|
|
|
p.i += 1;
|
2023-01-19 19:09:13 +01:00
|
|
|
if (config.preferRegex) {
|
|
|
|
const regex = config.caseSensitive ? new RegExp(val) : new RegExp(val, "i");
|
2023-01-13 01:38:42 +01:00
|
|
|
return { type: 'MATCH_ALL', matchType: 'REGEX', val: regex };
|
|
|
|
} else {
|
|
|
|
return { type: 'MATCH_ALL', matchType: 'STRING', val }
|
|
|
|
}
|
2023-01-12 05:55:21 +01:00
|
|
|
}
|
2023-01-13 01:38:42 +01:00
|
|
|
if (type === 'STRING') {
|
|
|
|
p.i += 1;
|
|
|
|
return { type: 'MATCH_ALL', matchType: 'STRING', val };
|
|
|
|
}
|
|
|
|
if (type === 'REGEX') {
|
|
|
|
p.i += 1;
|
2023-01-19 19:09:13 +01:00
|
|
|
const regex = config.caseSensitive ? new RegExp(val) : new RegExp(val, "i");
|
2023-01-13 01:38:42 +01:00
|
|
|
return { type: 'MATCH_ALL', matchType: 'REGEX', val: regex };
|
|
|
|
}
|
2023-01-21 00:28:55 +01:00
|
|
|
if (type === 'LPAREN') {
|
|
|
|
p.i += 1;
|
|
|
|
const content = conjunction(p, config);
|
|
|
|
expect((type, _) => type === 'RPAREN', 'a closing parenthesis', p);
|
|
|
|
return {
|
|
|
|
type: 'GROUPING',
|
|
|
|
content,
|
|
|
|
};
|
|
|
|
}
|
2023-01-13 01:38:42 +01:00
|
|
|
throw `Parse Error: Expected a regular expression or a string, but got: ${p.tokens[p.i]}; ${JSON.stringify(p)}`;
|
2023-01-12 05:55:21 +01:00
|
|
|
}
|
|
|
|
|
2023-01-19 19:09:13 +01:00
|
|
|
function value(p, config) {
|
2023-01-12 05:55:21 +01:00
|
|
|
const [type, val] = p.tokens[p.i];
|
|
|
|
|
2023-01-19 19:09:13 +01:00
|
|
|
// Cast atoms into strings or regexes depending on the current config.
|
2023-01-12 05:55:21 +01:00
|
|
|
if (type === 'ATOM') {
|
|
|
|
p.i += 1;
|
2023-01-19 19:09:13 +01:00
|
|
|
if (config.preferRegex) {
|
|
|
|
const regex = config.caseSensitive ? new RegExp(val) : new RegExp(val, "i");
|
2023-01-12 05:55:21 +01:00
|
|
|
return { type: 'REGEX', val: regex };
|
|
|
|
} else {
|
|
|
|
return { type: 'STRING', val }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (type === 'STRING') {
|
|
|
|
p.i += 1;
|
|
|
|
return { type, val };
|
|
|
|
}
|
|
|
|
if (type === 'REGEX') {
|
|
|
|
p.i += 1;
|
2023-01-19 19:09:13 +01:00
|
|
|
const regex = config.caseSensitive ? new RegExp(val) : new RegExp(val, "i");
|
2023-01-12 05:55:21 +01:00
|
|
|
return { type, val: regex };
|
|
|
|
}
|
|
|
|
throw `Parse Error: Expected a regular expression or a string, but got: ${p.tokens[p.i]}; ${JSON.stringify(p)}`;
|
|
|
|
}
|
2023-01-14 02:35:43 +01:00
|
|
|
|
|
|
|
function date(p) {
|
|
|
|
const [type, val] = p.tokens[p.i];
|
|
|
|
p.i += 1;
|
|
|
|
|
|
|
|
return val;
|
|
|
|
}
|