refactor(lexer): rename to scanner, use ints, etc.
This commit is contained in:
parent
3482fb1291
commit
d7d52aaef2
|
@ -1,5 +1,5 @@
|
||||||
import {List, ListWrapper, SetWrapper} from "facade/collection";
|
import {List, ListWrapper, SetWrapper} from "facade/collection";
|
||||||
import {FIELD, NumberWrapper, StringJoiner, StringWrapper} from "facade/lang";
|
import {int, FIELD, NumberWrapper, StringJoiner, StringWrapper} from "facade/lang";
|
||||||
|
|
||||||
// TODO(chirayu): Rewrite as consts when possible.
|
// TODO(chirayu): Rewrite as consts when possible.
|
||||||
export var TOKEN_TYPE_CHARACTER = 1;
|
export var TOKEN_TYPE_CHARACTER = 1;
|
||||||
|
@ -12,21 +12,21 @@ export var TOKEN_TYPE_NUMBER = 6;
|
||||||
export class Token {
|
export class Token {
|
||||||
@FIELD('final index:int')
|
@FIELD('final index:int')
|
||||||
@FIELD('final type:int')
|
@FIELD('final type:int')
|
||||||
@FIELD('final _intValue:int')
|
@FIELD('final _numValue:int')
|
||||||
@FIELD('final _strValue:int')
|
@FIELD('final _strValue:int')
|
||||||
constructor(index:number/*int*/, type:number/*int*/, intValue:number/*int*/, strValue:string) {
|
constructor(index:int, type:int, numValue:number, strValue:string) {
|
||||||
/**
|
/**
|
||||||
* NOTE: To ensure that this constructor creates the same hidden class each time, ensure that
|
* NOTE: To ensure that this constructor creates the same hidden class each time, ensure that
|
||||||
* all the fields are assigned to in the exact same order in each run of this constructor.
|
* all the fields are assigned to in the exact same order in each run of this constructor.
|
||||||
*/
|
*/
|
||||||
this.index = index;
|
this.index = index;
|
||||||
this.type = type;
|
this.type = type;
|
||||||
this._intValue = intValue;
|
this._numValue = numValue;
|
||||||
this._strValue = strValue;
|
this._strValue = strValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
isCharacter(code:number/*int*/):boolean {
|
isCharacter(code:int):boolean {
|
||||||
return (this.type == TOKEN_TYPE_CHARACTER && this._intValue == code);
|
return (this.type == TOKEN_TYPE_CHARACTER && this._numValue == code);
|
||||||
}
|
}
|
||||||
|
|
||||||
isNumber():boolean {
|
isNumber():boolean {
|
||||||
|
@ -65,44 +65,44 @@ export class Token {
|
||||||
return (this.type == TOKEN_TYPE_KEYWORD && this._strValue == "false");
|
return (this.type == TOKEN_TYPE_KEYWORD && this._strValue == "false");
|
||||||
}
|
}
|
||||||
|
|
||||||
toNumber():number/*int*/ {
|
toNumber():number {
|
||||||
// -1 instead of NULL ok?
|
// -1 instead of NULL ok?
|
||||||
return (this.type == TOKEN_TYPE_NUMBER) ? this._intValue : -1;
|
return (this.type == TOKEN_TYPE_NUMBER) ? this._numValue : -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
toString():string {
|
toString():string {
|
||||||
var type:number/*int*/ = this.type;
|
var type:int = this.type;
|
||||||
if (type >= TOKEN_TYPE_CHARACTER && type <= TOKEN_TYPE_STRING) {
|
if (type >= TOKEN_TYPE_CHARACTER && type <= TOKEN_TYPE_STRING) {
|
||||||
return this._strValue;
|
return this._strValue;
|
||||||
} else if (type == TOKEN_TYPE_NUMBER) {
|
} else if (type == TOKEN_TYPE_NUMBER) {
|
||||||
return this._intValue.toString();
|
return this._numValue.toString();
|
||||||
} else {
|
} else {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function newCharacterToken(index:number/*int*/, code:number/*int*/):Token {
|
function newCharacterToken(index:int, code:int):Token {
|
||||||
return new Token(index, TOKEN_TYPE_CHARACTER, code, StringWrapper.fromCharCode(code));
|
return new Token(index, TOKEN_TYPE_CHARACTER, code, StringWrapper.fromCharCode(code));
|
||||||
}
|
}
|
||||||
|
|
||||||
function newIdentifierToken(index:number/*int*/, text:string):Token {
|
function newIdentifierToken(index:int, text:string):Token {
|
||||||
return new Token(index, TOKEN_TYPE_IDENTIFIER, 0, text);
|
return new Token(index, TOKEN_TYPE_IDENTIFIER, 0, text);
|
||||||
}
|
}
|
||||||
|
|
||||||
function newKeywordToken(index:number/*int*/, text:string):Token {
|
function newKeywordToken(index:int, text:string):Token {
|
||||||
return new Token(index, TOKEN_TYPE_KEYWORD, 0, text);
|
return new Token(index, TOKEN_TYPE_KEYWORD, 0, text);
|
||||||
}
|
}
|
||||||
|
|
||||||
function newOperatorToken(index:number/*int*/, text:string):Token {
|
function newOperatorToken(index:int, text:string):Token {
|
||||||
return new Token(index, TOKEN_TYPE_OPERATOR, 0, text);
|
return new Token(index, TOKEN_TYPE_OPERATOR, 0, text);
|
||||||
}
|
}
|
||||||
|
|
||||||
function newStringToken(index:number/*int*/, text:string):Token {
|
function newStringToken(index:int, text:string):Token {
|
||||||
return new Token(index, TOKEN_TYPE_STRING, 0, text);
|
return new Token(index, TOKEN_TYPE_STRING, 0, text);
|
||||||
}
|
}
|
||||||
|
|
||||||
function newNumberToken(index:number/*int*/, n:number/*int*/):Token {
|
function newNumberToken(index:int, n:number):Token {
|
||||||
return new Token(index, TOKEN_TYPE_NUMBER, n, "");
|
return new Token(index, TOKEN_TYPE_NUMBER, n, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -211,7 +211,7 @@ export class Scanner {
|
||||||
if (isIdentifierStart(peek)) return this.scanIdentifier();
|
if (isIdentifierStart(peek)) return this.scanIdentifier();
|
||||||
if (isDigit(peek)) return this.scanNumber(index);
|
if (isDigit(peek)) return this.scanNumber(index);
|
||||||
|
|
||||||
var start:number/*int*/ = index;
|
var start:int = index;
|
||||||
switch (peek) {
|
switch (peek) {
|
||||||
case $PERIOD:
|
case $PERIOD:
|
||||||
this.advance();
|
this.advance();
|
||||||
|
@ -255,21 +255,21 @@ export class Scanner {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
scanCharacter(start:number/*int*/, code:number/*int*/):Token {
|
scanCharacter(start:int, code:int):Token {
|
||||||
assert(this.peek == code);
|
assert(this.peek == code);
|
||||||
this.advance();
|
this.advance();
|
||||||
return newCharacterToken(start, code);
|
return newCharacterToken(start, code);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
scanOperator(start:number/*int*/, str:string):Token {
|
scanOperator(start:int, str:string):Token {
|
||||||
assert(this.peek == StringWrapper.charCodeAt(str, 0));
|
assert(this.peek == StringWrapper.charCodeAt(str, 0));
|
||||||
assert(SetWrapper.has(OPERATORS, str));
|
assert(SetWrapper.has(OPERATORS, str));
|
||||||
this.advance();
|
this.advance();
|
||||||
return newOperatorToken(start, str);
|
return newOperatorToken(start, str);
|
||||||
}
|
}
|
||||||
|
|
||||||
scanComplexOperator(start:number/*int*/, code:number/*int*/, one:string, two:string):Token {
|
scanComplexOperator(start:int, code:int, one:string, two:string):Token {
|
||||||
assert(this.peek == StringWrapper.charCodeAt(one, 0));
|
assert(this.peek == StringWrapper.charCodeAt(one, 0));
|
||||||
this.advance();
|
this.advance();
|
||||||
var str:string = one;
|
var str:string = one;
|
||||||
|
@ -283,7 +283,7 @@ export class Scanner {
|
||||||
|
|
||||||
scanIdentifier():Token {
|
scanIdentifier():Token {
|
||||||
assert(isIdentifierStart(this.peek));
|
assert(isIdentifierStart(this.peek));
|
||||||
var start:number/*int*/ = this.index;
|
var start:int = this.index;
|
||||||
this.advance();
|
this.advance();
|
||||||
while (isIdentifierPart(this.peek)) this.advance();
|
while (isIdentifierPart(this.peek)) this.advance();
|
||||||
var str:string = this.input.substring(start, this.index);
|
var str:string = this.input.substring(start, this.index);
|
||||||
|
@ -294,7 +294,7 @@ export class Scanner {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
scanNumber(start:number/*int*/):Token {
|
scanNumber(start:int):Token {
|
||||||
assert(isDigit(this.peek));
|
assert(isDigit(this.peek));
|
||||||
var simple:boolean = (this.index === start);
|
var simple:boolean = (this.index === start);
|
||||||
this.advance(); // Skip initial digit.
|
this.advance(); // Skip initial digit.
|
||||||
|
@ -315,18 +315,18 @@ export class Scanner {
|
||||||
}
|
}
|
||||||
var str:string = this.input.substring(start, this.index);
|
var str:string = this.input.substring(start, this.index);
|
||||||
// TODO
|
// TODO
|
||||||
var value:number = simple ? NumberWrapper.parseIntAutoRadix(str) : NumberWrapper.parseDouble(str);
|
var value:number = simple ? NumberWrapper.parseIntAutoRadix(str) : NumberWrapper.parseFloat(str);
|
||||||
return newNumberToken(start, value);
|
return newNumberToken(start, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
scanString():Token {
|
scanString():Token {
|
||||||
assert(this.peek == $SQ || this.peek == $DQ);
|
assert(this.peek == $SQ || this.peek == $DQ);
|
||||||
var start:number/*int*/ = this.index;
|
var start:int = this.index;
|
||||||
var quote:number/*int*/ = this.peek;
|
var quote:int = this.peek;
|
||||||
this.advance(); // Skip initial quote.
|
this.advance(); // Skip initial quote.
|
||||||
|
|
||||||
var buffer:StringJoiner; //ckck
|
var buffer:StringJoiner; //ckck
|
||||||
var marker:number/*int*/ = this.index;
|
var marker:int = this.index;
|
||||||
var input:string = this.input;
|
var input:string = this.input;
|
||||||
|
|
||||||
while (this.peek != quote) {
|
while (this.peek != quote) {
|
||||||
|
@ -334,12 +334,12 @@ export class Scanner {
|
||||||
if (buffer == null) buffer = new StringJoiner();
|
if (buffer == null) buffer = new StringJoiner();
|
||||||
buffer.add(input.substring(marker, this.index));
|
buffer.add(input.substring(marker, this.index));
|
||||||
this.advance();
|
this.advance();
|
||||||
var unescapedCode:number/*int*/;
|
var unescapedCode:int;
|
||||||
if (this.peek == $u) {
|
if (this.peek == $u) {
|
||||||
// 4 character hex code for unicode character.
|
// 4 character hex code for unicode character.
|
||||||
var hex:string = input.substring(this.index + 1, this.index + 5);
|
var hex:string = input.substring(this.index + 1, this.index + 5);
|
||||||
unescapedCode = NumberWrapper.parseInt(hex, 16);
|
unescapedCode = NumberWrapper.parseInt(hex, 16);
|
||||||
for (var i:number/*int*/ = 0; i < 5; i++) {
|
for (var i:int = 0; i < 5; i++) {
|
||||||
this.advance();
|
this.advance();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -369,23 +369,23 @@ export class Scanner {
|
||||||
}
|
}
|
||||||
|
|
||||||
error(message:string) {
|
error(message:string) {
|
||||||
var position:number/*int*/ = this.index + this.offset;
|
var position:int = this.index + this.offset;
|
||||||
throw `Lexer Error: ${message} at column ${position} in expression [${input}]`;
|
throw `Lexer Error: ${message} at column ${position} in expression [${input}]`;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function isWhitespace(code:number/*int*/):boolean {
|
function isWhitespace(code:int):boolean {
|
||||||
return (code >= $TAB && code <= $SPACE) || (code == $NBSP);
|
return (code >= $TAB && code <= $SPACE) || (code == $NBSP);
|
||||||
}
|
}
|
||||||
|
|
||||||
function isIdentifierStart(code:number/*int*/):boolean {
|
function isIdentifierStart(code:int):boolean {
|
||||||
return ($a <= code && code <= $z) ||
|
return ($a <= code && code <= $z) ||
|
||||||
($A <= code && code <= $Z) ||
|
($A <= code && code <= $Z) ||
|
||||||
(code == $_) ||
|
(code == $_) ||
|
||||||
(code == $$);
|
(code == $$);
|
||||||
}
|
}
|
||||||
|
|
||||||
function isIdentifierPart(code:number/*int*/):boolean {
|
function isIdentifierPart(code:int):boolean {
|
||||||
return ($a <= code && code <= $z) ||
|
return ($a <= code && code <= $z) ||
|
||||||
($A <= code && code <= $Z) ||
|
($A <= code && code <= $Z) ||
|
||||||
($0 <= code && code <= $9) ||
|
($0 <= code && code <= $9) ||
|
||||||
|
@ -393,19 +393,19 @@ function isIdentifierPart(code:number/*int*/):boolean {
|
||||||
(code == $$);
|
(code == $$);
|
||||||
}
|
}
|
||||||
|
|
||||||
function isDigit(code:number/*int*/):boolean {
|
function isDigit(code:int):boolean {
|
||||||
return $0 <= code && code <= $9;
|
return $0 <= code && code <= $9;
|
||||||
}
|
}
|
||||||
|
|
||||||
function isExponentStart(code:number/*int*/):boolean {
|
function isExponentStart(code:int):boolean {
|
||||||
return code == $e || code == $E;
|
return code == $e || code == $E;
|
||||||
}
|
}
|
||||||
|
|
||||||
function isExponentSign(code:number/*int*/):boolean {
|
function isExponentSign(code:int):boolean {
|
||||||
return code == $MINUS || code == $PLUS;
|
return code == $MINUS || code == $PLUS;
|
||||||
}
|
}
|
||||||
|
|
||||||
function unescape(code:number/*int*/):number/*int*/ {
|
function unescape(code:int):int {
|
||||||
switch(code) {
|
switch(code) {
|
||||||
case $n: return $LF;
|
case $n: return $LF;
|
||||||
case $f: return $FF;
|
case $f: return $FF;
|
||||||
|
@ -445,16 +445,4 @@ var KEYWORDS = SetWrapper.createFromList([
|
||||||
'undefined',
|
'undefined',
|
||||||
'true',
|
'true',
|
||||||
'false',
|
'false',
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
|
||||||
export function Lexer(text:string):List {
|
|
||||||
var scanner:Scanner = new Scanner(text);
|
|
||||||
var tokens:List<Token> = [];
|
|
||||||
var token:Token = scanner.scanToken();
|
|
||||||
while (token != null) {
|
|
||||||
ListWrapper.push(tokens, token);
|
|
||||||
token = scanner.scanToken();
|
|
||||||
}
|
|
||||||
return tokens;
|
|
||||||
}
|
|
|
@ -1,9 +1,20 @@
|
||||||
import {describe, it, expect} from 'test_lib/test_lib';
|
import {describe, it, expect} from 'test_lib/test_lib';
|
||||||
import {Lexer, Scanner, Token} from 'change_detection/parser/lexer';
|
import {Scanner, Token} from 'change_detection/parser/scanner';
|
||||||
import {DOM} from 'facade/dom';
|
import {DOM} from 'facade/dom';
|
||||||
import {List, ListWrapper} from "facade/collection";
|
import {List, ListWrapper} from "facade/collection";
|
||||||
import {StringWrapper} from "facade/lang";
|
import {StringWrapper} from "facade/lang";
|
||||||
|
|
||||||
|
function lex(text:string):List {
|
||||||
|
var scanner:Scanner = new Scanner(text);
|
||||||
|
var tokens:List<Token> = [];
|
||||||
|
var token:Token = scanner.scanToken();
|
||||||
|
while (token != null) {
|
||||||
|
ListWrapper.push(tokens, token);
|
||||||
|
token = scanner.scanToken();
|
||||||
|
}
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
function expectToken(token, index) {
|
function expectToken(token, index) {
|
||||||
expect(token instanceof Token).toBe(true);
|
expect(token instanceof Token).toBe(true);
|
||||||
expect(token.index).toEqual(index);
|
expect(token.index).toEqual(index);
|
||||||
|
@ -46,16 +57,16 @@ function expectKeywordToken(token, index, keyword) {
|
||||||
|
|
||||||
|
|
||||||
export function main() {
|
export function main() {
|
||||||
describe('lexer', function() {
|
describe('scanner', function() {
|
||||||
describe('token', function() {
|
describe('token', function() {
|
||||||
it('should tokenize a simple identifier', function() {
|
it('should tokenize a simple identifier', function() {
|
||||||
var tokens:List<int> = Lexer("j");
|
var tokens:List<int> = lex("j");
|
||||||
expect(tokens.length).toEqual(1);
|
expect(tokens.length).toEqual(1);
|
||||||
expectIdentifierToken(tokens[0], 0, 'j');
|
expectIdentifierToken(tokens[0], 0, 'j');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should tokenize a dotted identifier', function() {
|
it('should tokenize a dotted identifier', function() {
|
||||||
var tokens:List<int> = Lexer("j.k");
|
var tokens:List<int> = lex("j.k");
|
||||||
expect(tokens.length).toEqual(3);
|
expect(tokens.length).toEqual(3);
|
||||||
expectIdentifierToken(tokens[0], 0, 'j');
|
expectIdentifierToken(tokens[0], 0, 'j');
|
||||||
expectCharacterToken (tokens[1], 1, '.');
|
expectCharacterToken (tokens[1], 1, '.');
|
||||||
|
@ -63,34 +74,34 @@ export function main() {
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should tokenize an operator', function() {
|
it('should tokenize an operator', function() {
|
||||||
var tokens:List<int> = Lexer("j-k");
|
var tokens:List<int> = lex("j-k");
|
||||||
expect(tokens.length).toEqual(3);
|
expect(tokens.length).toEqual(3);
|
||||||
expectOperatorToken(tokens[1], 1, '-');
|
expectOperatorToken(tokens[1], 1, '-');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should tokenize an indexed operator', function() {
|
it('should tokenize an indexed operator', function() {
|
||||||
var tokens:List<int> = Lexer("j[k]");
|
var tokens:List<int> = lex("j[k]");
|
||||||
expect(tokens.length).toEqual(4);
|
expect(tokens.length).toEqual(4);
|
||||||
expectCharacterToken(tokens[1], 1, "[");
|
expectCharacterToken(tokens[1], 1, "[");
|
||||||
expectCharacterToken(tokens[3], 3, "]");
|
expectCharacterToken(tokens[3], 3, "]");
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should tokenize numbers', function() {
|
it('should tokenize numbers', function() {
|
||||||
var tokens:List<int> = Lexer("88");
|
var tokens:List<int> = lex("88");
|
||||||
expect(tokens.length).toEqual(1);
|
expect(tokens.length).toEqual(1);
|
||||||
expectNumberToken(tokens[0], 0, 88);
|
expectNumberToken(tokens[0], 0, 88);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should tokenize numbers within index ops', function() {
|
it('should tokenize numbers within index ops', function() {
|
||||||
expectNumberToken(Lexer("a[22]")[2], 2, 22);
|
expectNumberToken(lex("a[22]")[2], 2, 22);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should tokenize simple quoted strings', function() {
|
it('should tokenize simple quoted strings', function() {
|
||||||
expectStringToken(Lexer('"a"')[0], 0, "a");
|
expectStringToken(lex('"a"')[0], 0, "a");
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should tokenize quoted strings with escaped quotes', function() {
|
it('should tokenize quoted strings with escaped quotes', function() {
|
||||||
expectStringToken(Lexer('"a\\""')[0], 0, 'a"');
|
expectStringToken(lex('"a\\""')[0], 0, 'a"');
|
||||||
});
|
});
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|
Loading…
Reference in New Issue