feat(compiler): Recover on malformed keyed reads and keyed writes (#39004)

This patch adds support for recovering well-formed (and near-complete)
ASTs for semantically malformed keyed reads and keyed writes. See the
added tests for details on the types of semantics we can now recover;
in particular, notice that some assumptions are made about the form of
a keyed read/write intended by a user. For example, in the malformed
expression `a[1 + = 2`, we assume that the user meant to write a binary
expression for the key of `a`, and assign that key the value `2`. In
particular, we now parse this as `a[1 + <empty expression>] = 2`. There
are some different interpretations that can be made here, but I think
this is reasonable.

The actual changes in the parser code are fairly minimal (a nice
surprise!); the biggest addition is a `writeContext` that marks whether
the `=` operator can serve as a recovery point after error detection.

Part of #38596

PR Close #39004
This commit is contained in:
ayazhafiz 2020-09-25 18:52:06 -05:00 committed by Joey Perrott
parent 904adb72d2
commit 3bbbf39b58
2 changed files with 159 additions and 26 deletions

View File

@ -288,10 +288,24 @@ export class IvyParser extends Parser {
simpleExpressionChecker = IvySimpleExpressionChecker; // simpleExpressionChecker = IvySimpleExpressionChecker; //
} }
/** Describes a stateful context an expression parser is in. */
enum ParseContextFlags {
None = 0,
/**
* A Writable context is one in which a value may be written to an lvalue.
* For example, after we see a property access, we may expect a write to the
* property via the "=" operator.
* prop
* ^ possible "=" after
*/
Writable = 1,
}
export class _ParseAST { export class _ParseAST {
private rparensExpected = 0; private rparensExpected = 0;
private rbracketsExpected = 0; private rbracketsExpected = 0;
private rbracesExpected = 0; private rbracesExpected = 0;
private context = ParseContextFlags.None;
// Cache of expression start and input indeces to the absolute source span they map to, used to // Cache of expression start and input indeces to the absolute source span they map to, used to
// prevent creating superfluous source spans in `sourceSpan`. // prevent creating superfluous source spans in `sourceSpan`.
@ -368,6 +382,16 @@ export class _ParseAST {
this.index++; this.index++;
} }
/**
* Executes a callback in the provided context.
*/
private withContext<T>(context: ParseContextFlags, cb: () => T): T {
this.context |= context;
const ret = cb();
this.context ^= context;
return ret;
}
consumeOptionalCharacter(code: number): boolean { consumeOptionalCharacter(code: number): boolean {
if (this.next.isCharacter(code)) { if (this.next.isCharacter(code)) {
this.advance(); this.advance();
@ -384,6 +408,12 @@ export class _ParseAST {
return this.next.isKeywordAs(); return this.next.isKeywordAs();
} }
/**
* Consumes an expected character, otherwise emits an error about the missing expected character
* and skips over the token stream until reaching a recoverable point.
*
* See `this.error` and `this.skip` for more details.
*/
expectCharacter(code: number) { expectCharacter(code: number) {
if (this.consumeOptionalCharacter(code)) return; if (this.consumeOptionalCharacter(code)) return;
this.error(`Missing expected ${String.fromCharCode(code)}`); this.error(`Missing expected ${String.fromCharCode(code)}`);
@ -631,8 +661,12 @@ export class _ParseAST {
result = this.parseAccessMemberOrMethodCall(result, true); result = this.parseAccessMemberOrMethodCall(result, true);
} else if (this.consumeOptionalCharacter(chars.$LBRACKET)) { } else if (this.consumeOptionalCharacter(chars.$LBRACKET)) {
this.withContext(ParseContextFlags.Writable, () => {
this.rbracketsExpected++; this.rbracketsExpected++;
const key = this.parsePipe(); const key = this.parsePipe();
if (key instanceof EmptyExpr) {
this.error(`Key access cannot be empty`);
}
this.rbracketsExpected--; this.rbracketsExpected--;
this.expectCharacter(chars.$RBRACKET); this.expectCharacter(chars.$RBRACKET);
if (this.consumeOptionalOperator('=')) { if (this.consumeOptionalOperator('=')) {
@ -640,9 +674,10 @@ export class _ParseAST {
result = new KeyedWrite( result = new KeyedWrite(
this.span(resultStart), this.sourceSpan(resultStart), result, key, value); this.span(resultStart), this.sourceSpan(resultStart), result, key, value);
} else { } else {
result = new KeyedRead(this.span(resultStart), this.sourceSpan(resultStart), result, key); result =
new KeyedRead(this.span(resultStart), this.sourceSpan(resultStart), result, key);
} }
});
} else if (this.consumeOptionalCharacter(chars.$LPAREN)) { } else if (this.consumeOptionalCharacter(chars.$LPAREN)) {
this.rparensExpected++; this.rparensExpected++;
const args = this.parseCallArguments(); const args = this.parseCallArguments();
@ -994,6 +1029,10 @@ export class _ParseAST {
this.consumeOptionalCharacter(chars.$SEMICOLON) || this.consumeOptionalCharacter(chars.$COMMA); this.consumeOptionalCharacter(chars.$SEMICOLON) || this.consumeOptionalCharacter(chars.$COMMA);
} }
/**
* Records an error and skips over the token stream until reaching a recoverable point. See
* `this.skip` for more details on token skipping.
*/
error(message: string, index: number|null = null) { error(message: string, index: number|null = null) {
this.errors.push(new ParserError(message, this.input, this.locationText(index), this.location)); this.errors.push(new ParserError(message, this.input, this.locationText(index), this.location));
this.skip(); this.skip();
@ -1005,25 +1044,32 @@ export class _ParseAST {
`at the end of the expression`; `at the end of the expression`;
} }
// Error recovery should skip tokens until it encounters a recovery point. skip() treats /**
// the end of input and a ';' as unconditionally a recovery point. It also treats ')', * Error recovery should skip tokens until it encounters a recovery point. skip() treats
// '}' and ']' as conditional recovery points if one of calling productions is expecting * the end of input and a ';' as unconditionally a recovery point. It also treats ')',
// one of these symbols. This allows skip() to recover from errors such as '(a.) + 1' allowing * '}' and ']' as conditional recovery points if one of calling productions is expecting
// more of the AST to be retained (it doesn't skip any tokens as the ')' is retained because * one of these symbols. This allows skip() to recover from errors such as '(a.) + 1' allowing
// of the '(' begins an '(' <expr> ')' production). The recovery points of grouping symbols * more of the AST to be retained (it doesn't skip any tokens as the ')' is retained because
// must be conditional as they must be skipped if none of the calling productions are not * of the '(' begins an '(' <expr> ')' production). The recovery points of grouping symbols
// expecting the closing token else we will never make progress in the case of an * must be conditional as they must be skipped if none of the calling productions are not
// extraneous group closing symbol (such as a stray ')'). This is not the case for ';' because * expecting the closing token else we will never make progress in the case of an
// parseChain() is always the root production and it expects a ';'. * extraneous group closing symbol (such as a stray ')'). This is not the case for ';' because
* parseChain() is always the root production and it expects a ';'.
// If a production expects one of these token it increments the corresponding nesting count, *
// and then decrements it just prior to checking if the token is in the input. * Furthermore, the presence of a stateful context can add more recovery points.
* - in a `Writable` context, we are able to recover after seeing the `=` operator, which
* signals the presence of an independent rvalue expression following the `=` operator.
*
* If a production expects one of these token it increments the corresponding nesting count,
* and then decrements it just prior to checking if the token is in the input.
*/
private skip() { private skip() {
let n = this.next; let n = this.next;
while (this.index < this.tokens.length && !n.isCharacter(chars.$SEMICOLON) && while (this.index < this.tokens.length && !n.isCharacter(chars.$SEMICOLON) &&
(this.rparensExpected <= 0 || !n.isCharacter(chars.$RPAREN)) && (this.rparensExpected <= 0 || !n.isCharacter(chars.$RPAREN)) &&
(this.rbracesExpected <= 0 || !n.isCharacter(chars.$RBRACE)) && (this.rbracesExpected <= 0 || !n.isCharacter(chars.$RBRACE)) &&
(this.rbracketsExpected <= 0 || !n.isCharacter(chars.$RBRACKET))) { (this.rbracketsExpected <= 0 || !n.isCharacter(chars.$RBRACKET)) &&
(!(this.context & ParseContextFlags.Writable) || !n.isOperator('='))) {
if (this.next.isError()) { if (this.next.isError()) {
this.errors.push( this.errors.push(
new ParserError(this.next.toString()!, this.input, this.locationText(), this.location)); new ParserError(this.next.toString()!, this.input, this.locationText(), this.location));

View File

@ -154,6 +154,84 @@ describe('parser', () => {
}); });
}); });
describe('keyed read', () => {
it('should parse keyed reads', () => {
checkAction('a["a"]');
checkAction('this.a["a"]', 'a["a"]');
checkAction('a.a["a"]');
});
describe('malformed keyed reads', () => {
it('should recover on missing keys', () => {
checkActionWithError('a[]', 'a[]', 'Key access cannot be empty');
});
it('should recover on incomplete expression keys', () => {
checkActionWithError('a[1 + ]', 'a[1 + ]', 'Unexpected token ]');
});
it('should recover on unterminated keys', () => {
checkActionWithError(
'a[1 + 2', 'a[1 + 2]', 'Missing expected ] at the end of the expression');
});
it('should recover on incomplete and unterminated keys', () => {
checkActionWithError(
'a[1 + ', 'a[1 + ]', 'Missing expected ] at the end of the expression');
});
});
});
describe('keyed write', () => {
it('should parse keyed writes', () => {
checkAction('a["a"] = 1 + 2');
checkAction('this.a["a"] = 1 + 2', 'a["a"] = 1 + 2');
checkAction('a.a["a"] = 1 + 2');
});
describe('malformed keyed writes', () => {
it('should recover on empty rvalues', () => {
checkActionWithError('a["a"] = ', 'a["a"] = ', 'Unexpected end of expression');
});
it('should recover on incomplete rvalues', () => {
checkActionWithError('a["a"] = 1 + ', 'a["a"] = 1 + ', 'Unexpected end of expression');
});
it('should recover on missing keys', () => {
checkActionWithError('a[] = 1', 'a[] = 1', 'Key access cannot be empty');
});
it('should recover on incomplete expression keys', () => {
checkActionWithError('a[1 + ] = 1', 'a[1 + ] = 1', 'Unexpected token ]');
});
it('should recover on unterminated keys', () => {
checkActionWithError('a[1 + 2 = 1', 'a[1 + 2] = 1', 'Missing expected ]');
});
it('should recover on incomplete and unterminated keys', () => {
const ast = parseAction('a[1 + = 1');
expect(unparse(ast)).toEqual('a[1 + ] = 1');
validate(ast);
const errors = ast.errors.map(e => e.message);
expect(errors.length).toBe(2);
expect(errors[0]).toContain('Unexpected token =');
expect(errors[1]).toContain('Missing expected ]');
});
it('should error on writes after a keyed write', () => {
const ast = parseAction('a[1] = 1 = 2');
expect(unparse(ast)).toEqual('a[1] = 1');
validate(ast);
expect(ast.errors.length).toBe(1);
expect(ast.errors[0].message).toContain('Unexpected token \'=\'');
});
});
});
describe('conditional', () => { describe('conditional', () => {
it('should parse ternary/conditional expressions', () => { it('should parse ternary/conditional expressions', () => {
checkAction('7 == 3 + 4 ? 10 : 20'); checkAction('7 == 3 + 4 ? 10 : 20');
@ -926,3 +1004,12 @@ function expectActionError(text: string, message: string) {
function expectBindingError(text: string, message: string) { function expectBindingError(text: string, message: string) {
expectError(validate(parseBinding(text)), message); expectError(validate(parseBinding(text)), message);
} }
/**
* Check that an malformed action parses to a recovered AST while emitting an
* error.
*/
function checkActionWithError(text: string, expected: string, error: string) {
checkAction(text, expected);
expectActionError(text, error);
}