feat(compiler): Recover on malformed keyed reads and keyed writes (#39004)
This patch adds support for recovering well-formed (and near-complete) ASTs for semantically malformed keyed reads and keyed writes. See the added tests for details on the types of semantics we can now recover; in particular, notice that some assumptions are made about the form of a keyed read/write intended by a user. For example, in the malformed expression `a[1 + = 2`, we assume that the user meant to write a binary expression for the key of `a`, and assign that key the value `2`. In particular, we now parse this as `a[1 + <empty expression>] = 2`. There are some different interpretations that can be made here, but I think this is reasonable. The actual changes in the parser code are fairly minimal (a nice surprise!); the biggest addition is a `writeContext` that marks whether the `=` operator can serve as a recovery point after error detection. Part of #38596 PR Close #39004
This commit is contained in:
parent
904adb72d2
commit
3bbbf39b58
|
@ -288,10 +288,24 @@ export class IvyParser extends Parser {
|
|||
simpleExpressionChecker = IvySimpleExpressionChecker; //
|
||||
}
|
||||
|
||||
/** Describes a stateful context an expression parser is in. */
|
||||
enum ParseContextFlags {
|
||||
None = 0,
|
||||
/**
|
||||
* A Writable context is one in which a value may be written to an lvalue.
|
||||
* For example, after we see a property access, we may expect a write to the
|
||||
* property via the "=" operator.
|
||||
* prop
|
||||
* ^ possible "=" after
|
||||
*/
|
||||
Writable = 1,
|
||||
}
|
||||
|
||||
export class _ParseAST {
|
||||
private rparensExpected = 0;
|
||||
private rbracketsExpected = 0;
|
||||
private rbracesExpected = 0;
|
||||
private context = ParseContextFlags.None;
|
||||
|
||||
// Cache of expression start and input indeces to the absolute source span they map to, used to
|
||||
// prevent creating superfluous source spans in `sourceSpan`.
|
||||
|
@ -368,6 +382,16 @@ export class _ParseAST {
|
|||
this.index++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes a callback in the provided context.
|
||||
*/
|
||||
private withContext<T>(context: ParseContextFlags, cb: () => T): T {
|
||||
this.context |= context;
|
||||
const ret = cb();
|
||||
this.context ^= context;
|
||||
return ret;
|
||||
}
|
||||
|
||||
consumeOptionalCharacter(code: number): boolean {
|
||||
if (this.next.isCharacter(code)) {
|
||||
this.advance();
|
||||
|
@ -384,6 +408,12 @@ export class _ParseAST {
|
|||
return this.next.isKeywordAs();
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes an expected character, otherwise emits an error about the missing expected character
|
||||
* and skips over the token stream until reaching a recoverable point.
|
||||
*
|
||||
* See `this.error` and `this.skip` for more details.
|
||||
*/
|
||||
expectCharacter(code: number) {
|
||||
if (this.consumeOptionalCharacter(code)) return;
|
||||
this.error(`Missing expected ${String.fromCharCode(code)}`);
|
||||
|
@ -631,18 +661,23 @@ export class _ParseAST {
|
|||
result = this.parseAccessMemberOrMethodCall(result, true);
|
||||
|
||||
} else if (this.consumeOptionalCharacter(chars.$LBRACKET)) {
|
||||
this.rbracketsExpected++;
|
||||
const key = this.parsePipe();
|
||||
this.rbracketsExpected--;
|
||||
this.expectCharacter(chars.$RBRACKET);
|
||||
if (this.consumeOptionalOperator('=')) {
|
||||
const value = this.parseConditional();
|
||||
result = new KeyedWrite(
|
||||
this.span(resultStart), this.sourceSpan(resultStart), result, key, value);
|
||||
} else {
|
||||
result = new KeyedRead(this.span(resultStart), this.sourceSpan(resultStart), result, key);
|
||||
}
|
||||
|
||||
this.withContext(ParseContextFlags.Writable, () => {
|
||||
this.rbracketsExpected++;
|
||||
const key = this.parsePipe();
|
||||
if (key instanceof EmptyExpr) {
|
||||
this.error(`Key access cannot be empty`);
|
||||
}
|
||||
this.rbracketsExpected--;
|
||||
this.expectCharacter(chars.$RBRACKET);
|
||||
if (this.consumeOptionalOperator('=')) {
|
||||
const value = this.parseConditional();
|
||||
result = new KeyedWrite(
|
||||
this.span(resultStart), this.sourceSpan(resultStart), result, key, value);
|
||||
} else {
|
||||
result =
|
||||
new KeyedRead(this.span(resultStart), this.sourceSpan(resultStart), result, key);
|
||||
}
|
||||
});
|
||||
} else if (this.consumeOptionalCharacter(chars.$LPAREN)) {
|
||||
this.rparensExpected++;
|
||||
const args = this.parseCallArguments();
|
||||
|
@ -994,6 +1029,10 @@ export class _ParseAST {
|
|||
this.consumeOptionalCharacter(chars.$SEMICOLON) || this.consumeOptionalCharacter(chars.$COMMA);
|
||||
}
|
||||
|
||||
/**
|
||||
* Records an error and skips over the token stream until reaching a recoverable point. See
|
||||
* `this.skip` for more details on token skipping.
|
||||
*/
|
||||
error(message: string, index: number|null = null) {
|
||||
this.errors.push(new ParserError(message, this.input, this.locationText(index), this.location));
|
||||
this.skip();
|
||||
|
@ -1005,25 +1044,32 @@ export class _ParseAST {
|
|||
`at the end of the expression`;
|
||||
}
|
||||
|
||||
// Error recovery should skip tokens until it encounters a recovery point. skip() treats
|
||||
// the end of input and a ';' as unconditionally a recovery point. It also treats ')',
|
||||
// '}' and ']' as conditional recovery points if one of calling productions is expecting
|
||||
// one of these symbols. This allows skip() to recover from errors such as '(a.) + 1' allowing
|
||||
// more of the AST to be retained (it doesn't skip any tokens as the ')' is retained because
|
||||
// of the '(' begins an '(' <expr> ')' production). The recovery points of grouping symbols
|
||||
// must be conditional as they must be skipped if none of the calling productions are not
|
||||
// expecting the closing token else we will never make progress in the case of an
|
||||
// extraneous group closing symbol (such as a stray ')'). This is not the case for ';' because
|
||||
// parseChain() is always the root production and it expects a ';'.
|
||||
|
||||
// If a production expects one of these token it increments the corresponding nesting count,
|
||||
// and then decrements it just prior to checking if the token is in the input.
|
||||
/**
|
||||
* Error recovery should skip tokens until it encounters a recovery point. skip() treats
|
||||
* the end of input and a ';' as unconditionally a recovery point. It also treats ')',
|
||||
* '}' and ']' as conditional recovery points if one of calling productions is expecting
|
||||
* one of these symbols. This allows skip() to recover from errors such as '(a.) + 1' allowing
|
||||
* more of the AST to be retained (it doesn't skip any tokens as the ')' is retained because
|
||||
* of the '(' begins an '(' <expr> ')' production). The recovery points of grouping symbols
|
||||
* must be conditional as they must be skipped if none of the calling productions are not
|
||||
* expecting the closing token else we will never make progress in the case of an
|
||||
* extraneous group closing symbol (such as a stray ')'). This is not the case for ';' because
|
||||
* parseChain() is always the root production and it expects a ';'.
|
||||
*
|
||||
* Furthermore, the presence of a stateful context can add more recovery points.
|
||||
* - in a `Writable` context, we are able to recover after seeing the `=` operator, which
|
||||
* signals the presence of an independent rvalue expression following the `=` operator.
|
||||
*
|
||||
* If a production expects one of these token it increments the corresponding nesting count,
|
||||
* and then decrements it just prior to checking if the token is in the input.
|
||||
*/
|
||||
private skip() {
|
||||
let n = this.next;
|
||||
while (this.index < this.tokens.length && !n.isCharacter(chars.$SEMICOLON) &&
|
||||
(this.rparensExpected <= 0 || !n.isCharacter(chars.$RPAREN)) &&
|
||||
(this.rbracesExpected <= 0 || !n.isCharacter(chars.$RBRACE)) &&
|
||||
(this.rbracketsExpected <= 0 || !n.isCharacter(chars.$RBRACKET))) {
|
||||
(this.rbracketsExpected <= 0 || !n.isCharacter(chars.$RBRACKET)) &&
|
||||
(!(this.context & ParseContextFlags.Writable) || !n.isOperator('='))) {
|
||||
if (this.next.isError()) {
|
||||
this.errors.push(
|
||||
new ParserError(this.next.toString()!, this.input, this.locationText(), this.location));
|
||||
|
|
|
@ -154,6 +154,84 @@ describe('parser', () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe('keyed read', () => {
|
||||
it('should parse keyed reads', () => {
|
||||
checkAction('a["a"]');
|
||||
checkAction('this.a["a"]', 'a["a"]');
|
||||
checkAction('a.a["a"]');
|
||||
});
|
||||
|
||||
describe('malformed keyed reads', () => {
|
||||
it('should recover on missing keys', () => {
|
||||
checkActionWithError('a[]', 'a[]', 'Key access cannot be empty');
|
||||
});
|
||||
|
||||
it('should recover on incomplete expression keys', () => {
|
||||
checkActionWithError('a[1 + ]', 'a[1 + ]', 'Unexpected token ]');
|
||||
});
|
||||
|
||||
it('should recover on unterminated keys', () => {
|
||||
checkActionWithError(
|
||||
'a[1 + 2', 'a[1 + 2]', 'Missing expected ] at the end of the expression');
|
||||
});
|
||||
|
||||
it('should recover on incomplete and unterminated keys', () => {
|
||||
checkActionWithError(
|
||||
'a[1 + ', 'a[1 + ]', 'Missing expected ] at the end of the expression');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('keyed write', () => {
|
||||
it('should parse keyed writes', () => {
|
||||
checkAction('a["a"] = 1 + 2');
|
||||
checkAction('this.a["a"] = 1 + 2', 'a["a"] = 1 + 2');
|
||||
checkAction('a.a["a"] = 1 + 2');
|
||||
});
|
||||
|
||||
describe('malformed keyed writes', () => {
|
||||
it('should recover on empty rvalues', () => {
|
||||
checkActionWithError('a["a"] = ', 'a["a"] = ', 'Unexpected end of expression');
|
||||
});
|
||||
|
||||
it('should recover on incomplete rvalues', () => {
|
||||
checkActionWithError('a["a"] = 1 + ', 'a["a"] = 1 + ', 'Unexpected end of expression');
|
||||
});
|
||||
|
||||
it('should recover on missing keys', () => {
|
||||
checkActionWithError('a[] = 1', 'a[] = 1', 'Key access cannot be empty');
|
||||
});
|
||||
|
||||
it('should recover on incomplete expression keys', () => {
|
||||
checkActionWithError('a[1 + ] = 1', 'a[1 + ] = 1', 'Unexpected token ]');
|
||||
});
|
||||
|
||||
it('should recover on unterminated keys', () => {
|
||||
checkActionWithError('a[1 + 2 = 1', 'a[1 + 2] = 1', 'Missing expected ]');
|
||||
});
|
||||
|
||||
it('should recover on incomplete and unterminated keys', () => {
|
||||
const ast = parseAction('a[1 + = 1');
|
||||
expect(unparse(ast)).toEqual('a[1 + ] = 1');
|
||||
validate(ast);
|
||||
|
||||
const errors = ast.errors.map(e => e.message);
|
||||
expect(errors.length).toBe(2);
|
||||
expect(errors[0]).toContain('Unexpected token =');
|
||||
expect(errors[1]).toContain('Missing expected ]');
|
||||
});
|
||||
|
||||
it('should error on writes after a keyed write', () => {
|
||||
const ast = parseAction('a[1] = 1 = 2');
|
||||
expect(unparse(ast)).toEqual('a[1] = 1');
|
||||
validate(ast);
|
||||
|
||||
expect(ast.errors.length).toBe(1);
|
||||
expect(ast.errors[0].message).toContain('Unexpected token \'=\'');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('conditional', () => {
|
||||
it('should parse ternary/conditional expressions', () => {
|
||||
checkAction('7 == 3 + 4 ? 10 : 20');
|
||||
|
@ -926,3 +1004,12 @@ function expectActionError(text: string, message: string) {
|
|||
function expectBindingError(text: string, message: string) {
|
||||
expectError(validate(parseBinding(text)), message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that an malformed action parses to a recovered AST while emitting an
|
||||
* error.
|
||||
*/
|
||||
function checkActionWithError(text: string, expected: string, error: string) {
|
||||
checkAction(text, expected);
|
||||
expectActionError(text, error);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue