refactor(lexer): rename to scanner, use ints, etc.

This commit is contained in:
Chirayu Krishnappa 2014-10-01 16:58:52 -07:00
parent 3482fb1291
commit d7d52aaef2
2 changed files with 58 additions and 59 deletions

View File

@ -1,5 +1,5 @@
import {List, ListWrapper, SetWrapper} from "facade/collection";
import {FIELD, NumberWrapper, StringJoiner, StringWrapper} from "facade/lang";
import {int, FIELD, NumberWrapper, StringJoiner, StringWrapper} from "facade/lang";
// TODO(chirayu): Rewrite as consts when possible.
export var TOKEN_TYPE_CHARACTER = 1;
@ -12,21 +12,21 @@ export var TOKEN_TYPE_NUMBER = 6;
export class Token {
@FIELD('final index:int')
@FIELD('final type:int')
@FIELD('final _intValue:int')
@FIELD('final _numValue:int')
@FIELD('final _strValue:int')
constructor(index:number/*int*/, type:number/*int*/, intValue:number/*int*/, strValue:string) {
constructor(index:int, type:int, numValue:number, strValue:string) {
/**
* NOTE: To ensure that this constructor creates the same hidden class each time, ensure that
* all the fields are assigned to in the exact same order in each run of this constructor.
*/
this.index = index;
this.type = type;
this._intValue = intValue;
this._numValue = numValue;
this._strValue = strValue;
}
isCharacter(code:number/*int*/):boolean {
return (this.type == TOKEN_TYPE_CHARACTER && this._intValue == code);
isCharacter(code:int):boolean {
return (this.type == TOKEN_TYPE_CHARACTER && this._numValue == code);
}
isNumber():boolean {
@ -65,44 +65,44 @@ export class Token {
return (this.type == TOKEN_TYPE_KEYWORD && this._strValue == "false");
}
toNumber():number/*int*/ {
toNumber():number {
// -1 instead of NULL ok?
return (this.type == TOKEN_TYPE_NUMBER) ? this._intValue : -1;
return (this.type == TOKEN_TYPE_NUMBER) ? this._numValue : -1;
}
toString():string {
var type:number/*int*/ = this.type;
var type:int = this.type;
if (type >= TOKEN_TYPE_CHARACTER && type <= TOKEN_TYPE_STRING) {
return this._strValue;
} else if (type == TOKEN_TYPE_NUMBER) {
return this._intValue.toString();
return this._numValue.toString();
} else {
return null;
}
}
}
function newCharacterToken(index:number/*int*/, code:number/*int*/):Token {
function newCharacterToken(index:int, code:int):Token {
return new Token(index, TOKEN_TYPE_CHARACTER, code, StringWrapper.fromCharCode(code));
}
function newIdentifierToken(index:number/*int*/, text:string):Token {
function newIdentifierToken(index:int, text:string):Token {
return new Token(index, TOKEN_TYPE_IDENTIFIER, 0, text);
}
function newKeywordToken(index:number/*int*/, text:string):Token {
function newKeywordToken(index:int, text:string):Token {
return new Token(index, TOKEN_TYPE_KEYWORD, 0, text);
}
function newOperatorToken(index:number/*int*/, text:string):Token {
function newOperatorToken(index:int, text:string):Token {
return new Token(index, TOKEN_TYPE_OPERATOR, 0, text);
}
function newStringToken(index:number/*int*/, text:string):Token {
function newStringToken(index:int, text:string):Token {
return new Token(index, TOKEN_TYPE_STRING, 0, text);
}
function newNumberToken(index:number/*int*/, n:number/*int*/):Token {
function newNumberToken(index:int, n:number):Token {
return new Token(index, TOKEN_TYPE_NUMBER, n, "");
}
@ -211,7 +211,7 @@ export class Scanner {
if (isIdentifierStart(peek)) return this.scanIdentifier();
if (isDigit(peek)) return this.scanNumber(index);
var start:number/*int*/ = index;
var start:int = index;
switch (peek) {
case $PERIOD:
this.advance();
@ -255,21 +255,21 @@ export class Scanner {
return null;
}
scanCharacter(start:number/*int*/, code:number/*int*/):Token {
scanCharacter(start:int, code:int):Token {
assert(this.peek == code);
this.advance();
return newCharacterToken(start, code);
}
scanOperator(start:number/*int*/, str:string):Token {
scanOperator(start:int, str:string):Token {
assert(this.peek == StringWrapper.charCodeAt(str, 0));
assert(SetWrapper.has(OPERATORS, str));
this.advance();
return newOperatorToken(start, str);
}
scanComplexOperator(start:number/*int*/, code:number/*int*/, one:string, two:string):Token {
scanComplexOperator(start:int, code:int, one:string, two:string):Token {
assert(this.peek == StringWrapper.charCodeAt(one, 0));
this.advance();
var str:string = one;
@ -283,7 +283,7 @@ export class Scanner {
scanIdentifier():Token {
assert(isIdentifierStart(this.peek));
var start:number/*int*/ = this.index;
var start:int = this.index;
this.advance();
while (isIdentifierPart(this.peek)) this.advance();
var str:string = this.input.substring(start, this.index);
@ -294,7 +294,7 @@ export class Scanner {
}
}
scanNumber(start:number/*int*/):Token {
scanNumber(start:int):Token {
assert(isDigit(this.peek));
var simple:boolean = (this.index === start);
this.advance(); // Skip initial digit.
@ -315,18 +315,18 @@ export class Scanner {
}
var str:string = this.input.substring(start, this.index);
// TODO
var value:number = simple ? NumberWrapper.parseIntAutoRadix(str) : NumberWrapper.parseDouble(str);
var value:number = simple ? NumberWrapper.parseIntAutoRadix(str) : NumberWrapper.parseFloat(str);
return newNumberToken(start, value);
}
scanString():Token {
assert(this.peek == $SQ || this.peek == $DQ);
var start:number/*int*/ = this.index;
var quote:number/*int*/ = this.peek;
var start:int = this.index;
var quote:int = this.peek;
this.advance(); // Skip initial quote.
var buffer:StringJoiner; //ckck
var marker:number/*int*/ = this.index;
var marker:int = this.index;
var input:string = this.input;
while (this.peek != quote) {
@ -334,12 +334,12 @@ export class Scanner {
if (buffer == null) buffer = new StringJoiner();
buffer.add(input.substring(marker, this.index));
this.advance();
var unescapedCode:number/*int*/;
var unescapedCode:int;
if (this.peek == $u) {
// 4 character hex code for unicode character.
var hex:string = input.substring(this.index + 1, this.index + 5);
unescapedCode = NumberWrapper.parseInt(hex, 16);
for (var i:number/*int*/ = 0; i < 5; i++) {
for (var i:int = 0; i < 5; i++) {
this.advance();
}
} else {
@ -369,23 +369,23 @@ export class Scanner {
}
error(message:string) {
var position:number/*int*/ = this.index + this.offset;
var position:int = this.index + this.offset;
throw `Lexer Error: ${message} at column ${position} in expression [${input}]`;
}
}
function isWhitespace(code:number/*int*/):boolean {
function isWhitespace(code:int):boolean {
return (code >= $TAB && code <= $SPACE) || (code == $NBSP);
}
function isIdentifierStart(code:number/*int*/):boolean {
function isIdentifierStart(code:int):boolean {
return ($a <= code && code <= $z) ||
($A <= code && code <= $Z) ||
(code == $_) ||
(code == $$);
}
function isIdentifierPart(code:number/*int*/):boolean {
function isIdentifierPart(code:int):boolean {
return ($a <= code && code <= $z) ||
($A <= code && code <= $Z) ||
($0 <= code && code <= $9) ||
@ -393,19 +393,19 @@ function isIdentifierPart(code:number/*int*/):boolean {
(code == $$);
}
function isDigit(code:number/*int*/):boolean {
function isDigit(code:int):boolean {
return $0 <= code && code <= $9;
}
function isExponentStart(code:number/*int*/):boolean {
function isExponentStart(code:int):boolean {
return code == $e || code == $E;
}
function isExponentSign(code:number/*int*/):boolean {
function isExponentSign(code:int):boolean {
return code == $MINUS || code == $PLUS;
}
function unescape(code:number/*int*/):number/*int*/ {
function unescape(code:int):int {
switch(code) {
case $n: return $LF;
case $f: return $FF;
@ -445,16 +445,4 @@ var KEYWORDS = SetWrapper.createFromList([
'undefined',
'true',
'false',
]);
export function Lexer(text:string):List {
var scanner:Scanner = new Scanner(text);
var tokens:List<Token> = [];
var token:Token = scanner.scanToken();
while (token != null) {
ListWrapper.push(tokens, token);
token = scanner.scanToken();
}
return tokens;
}
]);

View File

@ -1,9 +1,20 @@
import {describe, it, expect} from 'test_lib/test_lib';
import {Lexer, Scanner, Token} from 'change_detection/parser/lexer';
import {Scanner, Token} from 'change_detection/parser/scanner';
import {DOM} from 'facade/dom';
import {List, ListWrapper} from "facade/collection";
import {StringWrapper} from "facade/lang";
function lex(text:string):List {
var scanner:Scanner = new Scanner(text);
var tokens:List<Token> = [];
var token:Token = scanner.scanToken();
while (token != null) {
ListWrapper.push(tokens, token);
token = scanner.scanToken();
}
return tokens;
}
function expectToken(token, index) {
expect(token instanceof Token).toBe(true);
expect(token.index).toEqual(index);
@ -46,16 +57,16 @@ function expectKeywordToken(token, index, keyword) {
export function main() {
describe('lexer', function() {
describe('scanner', function() {
describe('token', function() {
it('should tokenize a simple identifier', function() {
var tokens:List<int> = Lexer("j");
var tokens:List<int> = lex("j");
expect(tokens.length).toEqual(1);
expectIdentifierToken(tokens[0], 0, 'j');
});
it('should tokenize a dotted identifier', function() {
var tokens:List<int> = Lexer("j.k");
var tokens:List<int> = lex("j.k");
expect(tokens.length).toEqual(3);
expectIdentifierToken(tokens[0], 0, 'j');
expectCharacterToken (tokens[1], 1, '.');
@ -63,34 +74,34 @@ export function main() {
});
it('should tokenize an operator', function() {
var tokens:List<int> = Lexer("j-k");
var tokens:List<int> = lex("j-k");
expect(tokens.length).toEqual(3);
expectOperatorToken(tokens[1], 1, '-');
});
it('should tokenize an indexed operator', function() {
var tokens:List<int> = Lexer("j[k]");
var tokens:List<int> = lex("j[k]");
expect(tokens.length).toEqual(4);
expectCharacterToken(tokens[1], 1, "[");
expectCharacterToken(tokens[3], 3, "]");
});
it('should tokenize numbers', function() {
var tokens:List<int> = Lexer("88");
var tokens:List<int> = lex("88");
expect(tokens.length).toEqual(1);
expectNumberToken(tokens[0], 0, 88);
});
it('should tokenize numbers within index ops', function() {
expectNumberToken(Lexer("a[22]")[2], 2, 22);
expectNumberToken(lex("a[22]")[2], 2, 22);
});
it('should tokenize simple quoted strings', function() {
expectStringToken(Lexer('"a"')[0], 0, "a");
expectStringToken(lex('"a"')[0], 0, "a");
});
it('should tokenize quoted strings with escaped quotes', function() {
expectStringToken(Lexer('"a\\""')[0], 0, 'a"');
expectStringToken(lex('"a\\""')[0], 0, 'a"');
});
});