refactor(lexer): rename to scanner, use ints, etc.

This commit is contained in:
Chirayu Krishnappa 2014-10-01 16:58:52 -07:00
parent 3482fb1291
commit d7d52aaef2
2 changed files with 58 additions and 59 deletions

View File

@ -1,5 +1,5 @@
import {List, ListWrapper, SetWrapper} from "facade/collection"; import {List, ListWrapper, SetWrapper} from "facade/collection";
import {FIELD, NumberWrapper, StringJoiner, StringWrapper} from "facade/lang"; import {int, FIELD, NumberWrapper, StringJoiner, StringWrapper} from "facade/lang";
// TODO(chirayu): Rewrite as consts when possible. // TODO(chirayu): Rewrite as consts when possible.
export var TOKEN_TYPE_CHARACTER = 1; export var TOKEN_TYPE_CHARACTER = 1;
@ -12,21 +12,21 @@ export var TOKEN_TYPE_NUMBER = 6;
export class Token { export class Token {
@FIELD('final index:int') @FIELD('final index:int')
@FIELD('final type:int') @FIELD('final type:int')
@FIELD('final _intValue:int') @FIELD('final _numValue:int')
@FIELD('final _strValue:int') @FIELD('final _strValue:int')
constructor(index:number/*int*/, type:number/*int*/, intValue:number/*int*/, strValue:string) { constructor(index:int, type:int, numValue:number, strValue:string) {
/** /**
* NOTE: To ensure that this constructor creates the same hidden class each time, ensure that * NOTE: To ensure that this constructor creates the same hidden class each time, ensure that
* all the fields are assigned to in the exact same order in each run of this constructor. * all the fields are assigned to in the exact same order in each run of this constructor.
*/ */
this.index = index; this.index = index;
this.type = type; this.type = type;
this._intValue = intValue; this._numValue = numValue;
this._strValue = strValue; this._strValue = strValue;
} }
isCharacter(code:number/*int*/):boolean { isCharacter(code:int):boolean {
return (this.type == TOKEN_TYPE_CHARACTER && this._intValue == code); return (this.type == TOKEN_TYPE_CHARACTER && this._numValue == code);
} }
isNumber():boolean { isNumber():boolean {
@ -65,44 +65,44 @@ export class Token {
return (this.type == TOKEN_TYPE_KEYWORD && this._strValue == "false"); return (this.type == TOKEN_TYPE_KEYWORD && this._strValue == "false");
} }
toNumber():number/*int*/ { toNumber():number {
// -1 instead of NULL ok? // -1 instead of NULL ok?
return (this.type == TOKEN_TYPE_NUMBER) ? this._intValue : -1; return (this.type == TOKEN_TYPE_NUMBER) ? this._numValue : -1;
} }
toString():string { toString():string {
var type:number/*int*/ = this.type; var type:int = this.type;
if (type >= TOKEN_TYPE_CHARACTER && type <= TOKEN_TYPE_STRING) { if (type >= TOKEN_TYPE_CHARACTER && type <= TOKEN_TYPE_STRING) {
return this._strValue; return this._strValue;
} else if (type == TOKEN_TYPE_NUMBER) { } else if (type == TOKEN_TYPE_NUMBER) {
return this._intValue.toString(); return this._numValue.toString();
} else { } else {
return null; return null;
} }
} }
} }
function newCharacterToken(index:number/*int*/, code:number/*int*/):Token { function newCharacterToken(index:int, code:int):Token {
return new Token(index, TOKEN_TYPE_CHARACTER, code, StringWrapper.fromCharCode(code)); return new Token(index, TOKEN_TYPE_CHARACTER, code, StringWrapper.fromCharCode(code));
} }
function newIdentifierToken(index:number/*int*/, text:string):Token { function newIdentifierToken(index:int, text:string):Token {
return new Token(index, TOKEN_TYPE_IDENTIFIER, 0, text); return new Token(index, TOKEN_TYPE_IDENTIFIER, 0, text);
} }
function newKeywordToken(index:number/*int*/, text:string):Token { function newKeywordToken(index:int, text:string):Token {
return new Token(index, TOKEN_TYPE_KEYWORD, 0, text); return new Token(index, TOKEN_TYPE_KEYWORD, 0, text);
} }
function newOperatorToken(index:number/*int*/, text:string):Token { function newOperatorToken(index:int, text:string):Token {
return new Token(index, TOKEN_TYPE_OPERATOR, 0, text); return new Token(index, TOKEN_TYPE_OPERATOR, 0, text);
} }
function newStringToken(index:number/*int*/, text:string):Token { function newStringToken(index:int, text:string):Token {
return new Token(index, TOKEN_TYPE_STRING, 0, text); return new Token(index, TOKEN_TYPE_STRING, 0, text);
} }
function newNumberToken(index:number/*int*/, n:number/*int*/):Token { function newNumberToken(index:int, n:number):Token {
return new Token(index, TOKEN_TYPE_NUMBER, n, ""); return new Token(index, TOKEN_TYPE_NUMBER, n, "");
} }
@ -211,7 +211,7 @@ export class Scanner {
if (isIdentifierStart(peek)) return this.scanIdentifier(); if (isIdentifierStart(peek)) return this.scanIdentifier();
if (isDigit(peek)) return this.scanNumber(index); if (isDigit(peek)) return this.scanNumber(index);
var start:number/*int*/ = index; var start:int = index;
switch (peek) { switch (peek) {
case $PERIOD: case $PERIOD:
this.advance(); this.advance();
@ -255,21 +255,21 @@ export class Scanner {
return null; return null;
} }
scanCharacter(start:number/*int*/, code:number/*int*/):Token { scanCharacter(start:int, code:int):Token {
assert(this.peek == code); assert(this.peek == code);
this.advance(); this.advance();
return newCharacterToken(start, code); return newCharacterToken(start, code);
} }
scanOperator(start:number/*int*/, str:string):Token { scanOperator(start:int, str:string):Token {
assert(this.peek == StringWrapper.charCodeAt(str, 0)); assert(this.peek == StringWrapper.charCodeAt(str, 0));
assert(SetWrapper.has(OPERATORS, str)); assert(SetWrapper.has(OPERATORS, str));
this.advance(); this.advance();
return newOperatorToken(start, str); return newOperatorToken(start, str);
} }
scanComplexOperator(start:number/*int*/, code:number/*int*/, one:string, two:string):Token { scanComplexOperator(start:int, code:int, one:string, two:string):Token {
assert(this.peek == StringWrapper.charCodeAt(one, 0)); assert(this.peek == StringWrapper.charCodeAt(one, 0));
this.advance(); this.advance();
var str:string = one; var str:string = one;
@ -283,7 +283,7 @@ export class Scanner {
scanIdentifier():Token { scanIdentifier():Token {
assert(isIdentifierStart(this.peek)); assert(isIdentifierStart(this.peek));
var start:number/*int*/ = this.index; var start:int = this.index;
this.advance(); this.advance();
while (isIdentifierPart(this.peek)) this.advance(); while (isIdentifierPart(this.peek)) this.advance();
var str:string = this.input.substring(start, this.index); var str:string = this.input.substring(start, this.index);
@ -294,7 +294,7 @@ export class Scanner {
} }
} }
scanNumber(start:number/*int*/):Token { scanNumber(start:int):Token {
assert(isDigit(this.peek)); assert(isDigit(this.peek));
var simple:boolean = (this.index === start); var simple:boolean = (this.index === start);
this.advance(); // Skip initial digit. this.advance(); // Skip initial digit.
@ -315,18 +315,18 @@ export class Scanner {
} }
var str:string = this.input.substring(start, this.index); var str:string = this.input.substring(start, this.index);
// TODO // TODO
var value:number = simple ? NumberWrapper.parseIntAutoRadix(str) : NumberWrapper.parseDouble(str); var value:number = simple ? NumberWrapper.parseIntAutoRadix(str) : NumberWrapper.parseFloat(str);
return newNumberToken(start, value); return newNumberToken(start, value);
} }
scanString():Token { scanString():Token {
assert(this.peek == $SQ || this.peek == $DQ); assert(this.peek == $SQ || this.peek == $DQ);
var start:number/*int*/ = this.index; var start:int = this.index;
var quote:number/*int*/ = this.peek; var quote:int = this.peek;
this.advance(); // Skip initial quote. this.advance(); // Skip initial quote.
var buffer:StringJoiner; //ckck var buffer:StringJoiner; //ckck
var marker:number/*int*/ = this.index; var marker:int = this.index;
var input:string = this.input; var input:string = this.input;
while (this.peek != quote) { while (this.peek != quote) {
@ -334,12 +334,12 @@ export class Scanner {
if (buffer == null) buffer = new StringJoiner(); if (buffer == null) buffer = new StringJoiner();
buffer.add(input.substring(marker, this.index)); buffer.add(input.substring(marker, this.index));
this.advance(); this.advance();
var unescapedCode:number/*int*/; var unescapedCode:int;
if (this.peek == $u) { if (this.peek == $u) {
// 4 character hex code for unicode character. // 4 character hex code for unicode character.
var hex:string = input.substring(this.index + 1, this.index + 5); var hex:string = input.substring(this.index + 1, this.index + 5);
unescapedCode = NumberWrapper.parseInt(hex, 16); unescapedCode = NumberWrapper.parseInt(hex, 16);
for (var i:number/*int*/ = 0; i < 5; i++) { for (var i:int = 0; i < 5; i++) {
this.advance(); this.advance();
} }
} else { } else {
@ -369,23 +369,23 @@ export class Scanner {
} }
error(message:string) { error(message:string) {
var position:number/*int*/ = this.index + this.offset; var position:int = this.index + this.offset;
throw `Lexer Error: ${message} at column ${position} in expression [${input}]`; throw `Lexer Error: ${message} at column ${position} in expression [${input}]`;
} }
} }
function isWhitespace(code:number/*int*/):boolean { function isWhitespace(code:int):boolean {
return (code >= $TAB && code <= $SPACE) || (code == $NBSP); return (code >= $TAB && code <= $SPACE) || (code == $NBSP);
} }
function isIdentifierStart(code:number/*int*/):boolean { function isIdentifierStart(code:int):boolean {
return ($a <= code && code <= $z) || return ($a <= code && code <= $z) ||
($A <= code && code <= $Z) || ($A <= code && code <= $Z) ||
(code == $_) || (code == $_) ||
(code == $$); (code == $$);
} }
function isIdentifierPart(code:number/*int*/):boolean { function isIdentifierPart(code:int):boolean {
return ($a <= code && code <= $z) || return ($a <= code && code <= $z) ||
($A <= code && code <= $Z) || ($A <= code && code <= $Z) ||
($0 <= code && code <= $9) || ($0 <= code && code <= $9) ||
@ -393,19 +393,19 @@ function isIdentifierPart(code:number/*int*/):boolean {
(code == $$); (code == $$);
} }
function isDigit(code:number/*int*/):boolean { function isDigit(code:int):boolean {
return $0 <= code && code <= $9; return $0 <= code && code <= $9;
} }
function isExponentStart(code:number/*int*/):boolean { function isExponentStart(code:int):boolean {
return code == $e || code == $E; return code == $e || code == $E;
} }
function isExponentSign(code:number/*int*/):boolean { function isExponentSign(code:int):boolean {
return code == $MINUS || code == $PLUS; return code == $MINUS || code == $PLUS;
} }
function unescape(code:number/*int*/):number/*int*/ { function unescape(code:int):int {
switch(code) { switch(code) {
case $n: return $LF; case $n: return $LF;
case $f: return $FF; case $f: return $FF;
@ -445,16 +445,4 @@ var KEYWORDS = SetWrapper.createFromList([
'undefined', 'undefined',
'true', 'true',
'false', 'false',
]); ]);
export function Lexer(text:string):List {
var scanner:Scanner = new Scanner(text);
var tokens:List<Token> = [];
var token:Token = scanner.scanToken();
while (token != null) {
ListWrapper.push(tokens, token);
token = scanner.scanToken();
}
return tokens;
}

View File

@ -1,9 +1,20 @@
import {describe, it, expect} from 'test_lib/test_lib'; import {describe, it, expect} from 'test_lib/test_lib';
import {Lexer, Scanner, Token} from 'change_detection/parser/lexer'; import {Scanner, Token} from 'change_detection/parser/scanner';
import {DOM} from 'facade/dom'; import {DOM} from 'facade/dom';
import {List, ListWrapper} from "facade/collection"; import {List, ListWrapper} from "facade/collection";
import {StringWrapper} from "facade/lang"; import {StringWrapper} from "facade/lang";
function lex(text:string):List {
var scanner:Scanner = new Scanner(text);
var tokens:List<Token> = [];
var token:Token = scanner.scanToken();
while (token != null) {
ListWrapper.push(tokens, token);
token = scanner.scanToken();
}
return tokens;
}
function expectToken(token, index) { function expectToken(token, index) {
expect(token instanceof Token).toBe(true); expect(token instanceof Token).toBe(true);
expect(token.index).toEqual(index); expect(token.index).toEqual(index);
@ -46,16 +57,16 @@ function expectKeywordToken(token, index, keyword) {
export function main() { export function main() {
describe('lexer', function() { describe('scanner', function() {
describe('token', function() { describe('token', function() {
it('should tokenize a simple identifier', function() { it('should tokenize a simple identifier', function() {
var tokens:List<int> = Lexer("j"); var tokens:List<int> = lex("j");
expect(tokens.length).toEqual(1); expect(tokens.length).toEqual(1);
expectIdentifierToken(tokens[0], 0, 'j'); expectIdentifierToken(tokens[0], 0, 'j');
}); });
it('should tokenize a dotted identifier', function() { it('should tokenize a dotted identifier', function() {
var tokens:List<int> = Lexer("j.k"); var tokens:List<int> = lex("j.k");
expect(tokens.length).toEqual(3); expect(tokens.length).toEqual(3);
expectIdentifierToken(tokens[0], 0, 'j'); expectIdentifierToken(tokens[0], 0, 'j');
expectCharacterToken (tokens[1], 1, '.'); expectCharacterToken (tokens[1], 1, '.');
@ -63,34 +74,34 @@ export function main() {
}); });
it('should tokenize an operator', function() { it('should tokenize an operator', function() {
var tokens:List<int> = Lexer("j-k"); var tokens:List<int> = lex("j-k");
expect(tokens.length).toEqual(3); expect(tokens.length).toEqual(3);
expectOperatorToken(tokens[1], 1, '-'); expectOperatorToken(tokens[1], 1, '-');
}); });
it('should tokenize an indexed operator', function() { it('should tokenize an indexed operator', function() {
var tokens:List<int> = Lexer("j[k]"); var tokens:List<int> = lex("j[k]");
expect(tokens.length).toEqual(4); expect(tokens.length).toEqual(4);
expectCharacterToken(tokens[1], 1, "["); expectCharacterToken(tokens[1], 1, "[");
expectCharacterToken(tokens[3], 3, "]"); expectCharacterToken(tokens[3], 3, "]");
}); });
it('should tokenize numbers', function() { it('should tokenize numbers', function() {
var tokens:List<int> = Lexer("88"); var tokens:List<int> = lex("88");
expect(tokens.length).toEqual(1); expect(tokens.length).toEqual(1);
expectNumberToken(tokens[0], 0, 88); expectNumberToken(tokens[0], 0, 88);
}); });
it('should tokenize numbers within index ops', function() { it('should tokenize numbers within index ops', function() {
expectNumberToken(Lexer("a[22]")[2], 2, 22); expectNumberToken(lex("a[22]")[2], 2, 22);
}); });
it('should tokenize simple quoted strings', function() { it('should tokenize simple quoted strings', function() {
expectStringToken(Lexer('"a"')[0], 0, "a"); expectStringToken(lex('"a"')[0], 0, "a");
}); });
it('should tokenize quoted strings with escaped quotes', function() { it('should tokenize quoted strings with escaped quotes', function() {
expectStringToken(Lexer('"a\\""')[0], 0, 'a"'); expectStringToken(lex('"a\\""')[0], 0, 'a"');
}); });
}); });