refactor(lexer): rename to scanner, use ints, etc.
This commit is contained in:
parent
3482fb1291
commit
d7d52aaef2
|
@ -1,5 +1,5 @@
|
|||
import {List, ListWrapper, SetWrapper} from "facade/collection";
|
||||
import {FIELD, NumberWrapper, StringJoiner, StringWrapper} from "facade/lang";
|
||||
import {int, FIELD, NumberWrapper, StringJoiner, StringWrapper} from "facade/lang";
|
||||
|
||||
// TODO(chirayu): Rewrite as consts when possible.
|
||||
export var TOKEN_TYPE_CHARACTER = 1;
|
||||
|
@ -12,21 +12,21 @@ export var TOKEN_TYPE_NUMBER = 6;
|
|||
export class Token {
|
||||
@FIELD('final index:int')
|
||||
@FIELD('final type:int')
|
||||
@FIELD('final _intValue:int')
|
||||
@FIELD('final _numValue:int')
|
||||
@FIELD('final _strValue:int')
|
||||
constructor(index:number/*int*/, type:number/*int*/, intValue:number/*int*/, strValue:string) {
|
||||
constructor(index:int, type:int, numValue:number, strValue:string) {
|
||||
/**
|
||||
* NOTE: To ensure that this constructor creates the same hidden class each time, ensure that
|
||||
* all the fields are assigned to in the exact same order in each run of this constructor.
|
||||
*/
|
||||
this.index = index;
|
||||
this.type = type;
|
||||
this._intValue = intValue;
|
||||
this._numValue = numValue;
|
||||
this._strValue = strValue;
|
||||
}
|
||||
|
||||
isCharacter(code:number/*int*/):boolean {
|
||||
return (this.type == TOKEN_TYPE_CHARACTER && this._intValue == code);
|
||||
isCharacter(code:int):boolean {
|
||||
return (this.type == TOKEN_TYPE_CHARACTER && this._numValue == code);
|
||||
}
|
||||
|
||||
isNumber():boolean {
|
||||
|
@ -65,44 +65,44 @@ export class Token {
|
|||
return (this.type == TOKEN_TYPE_KEYWORD && this._strValue == "false");
|
||||
}
|
||||
|
||||
toNumber():number/*int*/ {
|
||||
toNumber():number {
|
||||
// -1 instead of NULL ok?
|
||||
return (this.type == TOKEN_TYPE_NUMBER) ? this._intValue : -1;
|
||||
return (this.type == TOKEN_TYPE_NUMBER) ? this._numValue : -1;
|
||||
}
|
||||
|
||||
toString():string {
|
||||
var type:number/*int*/ = this.type;
|
||||
var type:int = this.type;
|
||||
if (type >= TOKEN_TYPE_CHARACTER && type <= TOKEN_TYPE_STRING) {
|
||||
return this._strValue;
|
||||
} else if (type == TOKEN_TYPE_NUMBER) {
|
||||
return this._intValue.toString();
|
||||
return this._numValue.toString();
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function newCharacterToken(index:number/*int*/, code:number/*int*/):Token {
|
||||
function newCharacterToken(index:int, code:int):Token {
|
||||
return new Token(index, TOKEN_TYPE_CHARACTER, code, StringWrapper.fromCharCode(code));
|
||||
}
|
||||
|
||||
function newIdentifierToken(index:number/*int*/, text:string):Token {
|
||||
function newIdentifierToken(index:int, text:string):Token {
|
||||
return new Token(index, TOKEN_TYPE_IDENTIFIER, 0, text);
|
||||
}
|
||||
|
||||
function newKeywordToken(index:number/*int*/, text:string):Token {
|
||||
function newKeywordToken(index:int, text:string):Token {
|
||||
return new Token(index, TOKEN_TYPE_KEYWORD, 0, text);
|
||||
}
|
||||
|
||||
function newOperatorToken(index:number/*int*/, text:string):Token {
|
||||
function newOperatorToken(index:int, text:string):Token {
|
||||
return new Token(index, TOKEN_TYPE_OPERATOR, 0, text);
|
||||
}
|
||||
|
||||
function newStringToken(index:number/*int*/, text:string):Token {
|
||||
function newStringToken(index:int, text:string):Token {
|
||||
return new Token(index, TOKEN_TYPE_STRING, 0, text);
|
||||
}
|
||||
|
||||
function newNumberToken(index:number/*int*/, n:number/*int*/):Token {
|
||||
function newNumberToken(index:int, n:number):Token {
|
||||
return new Token(index, TOKEN_TYPE_NUMBER, n, "");
|
||||
}
|
||||
|
||||
|
@ -211,7 +211,7 @@ export class Scanner {
|
|||
if (isIdentifierStart(peek)) return this.scanIdentifier();
|
||||
if (isDigit(peek)) return this.scanNumber(index);
|
||||
|
||||
var start:number/*int*/ = index;
|
||||
var start:int = index;
|
||||
switch (peek) {
|
||||
case $PERIOD:
|
||||
this.advance();
|
||||
|
@ -255,21 +255,21 @@ export class Scanner {
|
|||
return null;
|
||||
}
|
||||
|
||||
scanCharacter(start:number/*int*/, code:number/*int*/):Token {
|
||||
scanCharacter(start:int, code:int):Token {
|
||||
assert(this.peek == code);
|
||||
this.advance();
|
||||
return newCharacterToken(start, code);
|
||||
}
|
||||
|
||||
|
||||
scanOperator(start:number/*int*/, str:string):Token {
|
||||
scanOperator(start:int, str:string):Token {
|
||||
assert(this.peek == StringWrapper.charCodeAt(str, 0));
|
||||
assert(SetWrapper.has(OPERATORS, str));
|
||||
this.advance();
|
||||
return newOperatorToken(start, str);
|
||||
}
|
||||
|
||||
scanComplexOperator(start:number/*int*/, code:number/*int*/, one:string, two:string):Token {
|
||||
scanComplexOperator(start:int, code:int, one:string, two:string):Token {
|
||||
assert(this.peek == StringWrapper.charCodeAt(one, 0));
|
||||
this.advance();
|
||||
var str:string = one;
|
||||
|
@ -283,7 +283,7 @@ export class Scanner {
|
|||
|
||||
scanIdentifier():Token {
|
||||
assert(isIdentifierStart(this.peek));
|
||||
var start:number/*int*/ = this.index;
|
||||
var start:int = this.index;
|
||||
this.advance();
|
||||
while (isIdentifierPart(this.peek)) this.advance();
|
||||
var str:string = this.input.substring(start, this.index);
|
||||
|
@ -294,7 +294,7 @@ export class Scanner {
|
|||
}
|
||||
}
|
||||
|
||||
scanNumber(start:number/*int*/):Token {
|
||||
scanNumber(start:int):Token {
|
||||
assert(isDigit(this.peek));
|
||||
var simple:boolean = (this.index === start);
|
||||
this.advance(); // Skip initial digit.
|
||||
|
@ -315,18 +315,18 @@ export class Scanner {
|
|||
}
|
||||
var str:string = this.input.substring(start, this.index);
|
||||
// TODO
|
||||
var value:number = simple ? NumberWrapper.parseIntAutoRadix(str) : NumberWrapper.parseDouble(str);
|
||||
var value:number = simple ? NumberWrapper.parseIntAutoRadix(str) : NumberWrapper.parseFloat(str);
|
||||
return newNumberToken(start, value);
|
||||
}
|
||||
|
||||
scanString():Token {
|
||||
assert(this.peek == $SQ || this.peek == $DQ);
|
||||
var start:number/*int*/ = this.index;
|
||||
var quote:number/*int*/ = this.peek;
|
||||
var start:int = this.index;
|
||||
var quote:int = this.peek;
|
||||
this.advance(); // Skip initial quote.
|
||||
|
||||
var buffer:StringJoiner; //ckck
|
||||
var marker:number/*int*/ = this.index;
|
||||
var marker:int = this.index;
|
||||
var input:string = this.input;
|
||||
|
||||
while (this.peek != quote) {
|
||||
|
@ -334,12 +334,12 @@ export class Scanner {
|
|||
if (buffer == null) buffer = new StringJoiner();
|
||||
buffer.add(input.substring(marker, this.index));
|
||||
this.advance();
|
||||
var unescapedCode:number/*int*/;
|
||||
var unescapedCode:int;
|
||||
if (this.peek == $u) {
|
||||
// 4 character hex code for unicode character.
|
||||
var hex:string = input.substring(this.index + 1, this.index + 5);
|
||||
unescapedCode = NumberWrapper.parseInt(hex, 16);
|
||||
for (var i:number/*int*/ = 0; i < 5; i++) {
|
||||
for (var i:int = 0; i < 5; i++) {
|
||||
this.advance();
|
||||
}
|
||||
} else {
|
||||
|
@ -369,23 +369,23 @@ export class Scanner {
|
|||
}
|
||||
|
||||
error(message:string) {
|
||||
var position:number/*int*/ = this.index + this.offset;
|
||||
var position:int = this.index + this.offset;
|
||||
throw `Lexer Error: ${message} at column ${position} in expression [${input}]`;
|
||||
}
|
||||
}
|
||||
|
||||
function isWhitespace(code:number/*int*/):boolean {
|
||||
function isWhitespace(code:int):boolean {
|
||||
return (code >= $TAB && code <= $SPACE) || (code == $NBSP);
|
||||
}
|
||||
|
||||
function isIdentifierStart(code:number/*int*/):boolean {
|
||||
function isIdentifierStart(code:int):boolean {
|
||||
return ($a <= code && code <= $z) ||
|
||||
($A <= code && code <= $Z) ||
|
||||
(code == $_) ||
|
||||
(code == $$);
|
||||
}
|
||||
|
||||
function isIdentifierPart(code:number/*int*/):boolean {
|
||||
function isIdentifierPart(code:int):boolean {
|
||||
return ($a <= code && code <= $z) ||
|
||||
($A <= code && code <= $Z) ||
|
||||
($0 <= code && code <= $9) ||
|
||||
|
@ -393,19 +393,19 @@ function isIdentifierPart(code:number/*int*/):boolean {
|
|||
(code == $$);
|
||||
}
|
||||
|
||||
function isDigit(code:number/*int*/):boolean {
|
||||
function isDigit(code:int):boolean {
|
||||
return $0 <= code && code <= $9;
|
||||
}
|
||||
|
||||
function isExponentStart(code:number/*int*/):boolean {
|
||||
function isExponentStart(code:int):boolean {
|
||||
return code == $e || code == $E;
|
||||
}
|
||||
|
||||
function isExponentSign(code:number/*int*/):boolean {
|
||||
function isExponentSign(code:int):boolean {
|
||||
return code == $MINUS || code == $PLUS;
|
||||
}
|
||||
|
||||
function unescape(code:number/*int*/):number/*int*/ {
|
||||
function unescape(code:int):int {
|
||||
switch(code) {
|
||||
case $n: return $LF;
|
||||
case $f: return $FF;
|
||||
|
@ -446,15 +446,3 @@ var KEYWORDS = SetWrapper.createFromList([
|
|||
'true',
|
||||
'false',
|
||||
]);
|
||||
|
||||
|
||||
export function Lexer(text:string):List {
|
||||
var scanner:Scanner = new Scanner(text);
|
||||
var tokens:List<Token> = [];
|
||||
var token:Token = scanner.scanToken();
|
||||
while (token != null) {
|
||||
ListWrapper.push(tokens, token);
|
||||
token = scanner.scanToken();
|
||||
}
|
||||
return tokens;
|
||||
}
|
|
@ -1,9 +1,20 @@
|
|||
import {describe, it, expect} from 'test_lib/test_lib';
|
||||
import {Lexer, Scanner, Token} from 'change_detection/parser/lexer';
|
||||
import {Scanner, Token} from 'change_detection/parser/scanner';
|
||||
import {DOM} from 'facade/dom';
|
||||
import {List, ListWrapper} from "facade/collection";
|
||||
import {StringWrapper} from "facade/lang";
|
||||
|
||||
function lex(text:string):List {
|
||||
var scanner:Scanner = new Scanner(text);
|
||||
var tokens:List<Token> = [];
|
||||
var token:Token = scanner.scanToken();
|
||||
while (token != null) {
|
||||
ListWrapper.push(tokens, token);
|
||||
token = scanner.scanToken();
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
function expectToken(token, index) {
|
||||
expect(token instanceof Token).toBe(true);
|
||||
expect(token.index).toEqual(index);
|
||||
|
@ -46,16 +57,16 @@ function expectKeywordToken(token, index, keyword) {
|
|||
|
||||
|
||||
export function main() {
|
||||
describe('lexer', function() {
|
||||
describe('scanner', function() {
|
||||
describe('token', function() {
|
||||
it('should tokenize a simple identifier', function() {
|
||||
var tokens:List<int> = Lexer("j");
|
||||
var tokens:List<int> = lex("j");
|
||||
expect(tokens.length).toEqual(1);
|
||||
expectIdentifierToken(tokens[0], 0, 'j');
|
||||
});
|
||||
|
||||
it('should tokenize a dotted identifier', function() {
|
||||
var tokens:List<int> = Lexer("j.k");
|
||||
var tokens:List<int> = lex("j.k");
|
||||
expect(tokens.length).toEqual(3);
|
||||
expectIdentifierToken(tokens[0], 0, 'j');
|
||||
expectCharacterToken (tokens[1], 1, '.');
|
||||
|
@ -63,34 +74,34 @@ export function main() {
|
|||
});
|
||||
|
||||
it('should tokenize an operator', function() {
|
||||
var tokens:List<int> = Lexer("j-k");
|
||||
var tokens:List<int> = lex("j-k");
|
||||
expect(tokens.length).toEqual(3);
|
||||
expectOperatorToken(tokens[1], 1, '-');
|
||||
});
|
||||
|
||||
it('should tokenize an indexed operator', function() {
|
||||
var tokens:List<int> = Lexer("j[k]");
|
||||
var tokens:List<int> = lex("j[k]");
|
||||
expect(tokens.length).toEqual(4);
|
||||
expectCharacterToken(tokens[1], 1, "[");
|
||||
expectCharacterToken(tokens[3], 3, "]");
|
||||
});
|
||||
|
||||
it('should tokenize numbers', function() {
|
||||
var tokens:List<int> = Lexer("88");
|
||||
var tokens:List<int> = lex("88");
|
||||
expect(tokens.length).toEqual(1);
|
||||
expectNumberToken(tokens[0], 0, 88);
|
||||
});
|
||||
|
||||
it('should tokenize numbers within index ops', function() {
|
||||
expectNumberToken(Lexer("a[22]")[2], 2, 22);
|
||||
expectNumberToken(lex("a[22]")[2], 2, 22);
|
||||
});
|
||||
|
||||
it('should tokenize simple quoted strings', function() {
|
||||
expectStringToken(Lexer('"a"')[0], 0, "a");
|
||||
expectStringToken(lex('"a"')[0], 0, "a");
|
||||
});
|
||||
|
||||
it('should tokenize quoted strings with escaped quotes', function() {
|
||||
expectStringToken(Lexer('"a\\""')[0], 0, 'a"');
|
||||
expectStringToken(lex('"a\\""')[0], 0, 'a"');
|
||||
});
|
||||
|
||||
});
|
||||
|
|
Loading…
Reference in New Issue