|
July 2000 Draft
JavaScript 2.0
Formal Description
Lexer Semantics
|
Monday, December 6, 1999
The lexer semantics describe the actions the lexer takes in order to transform an input stream of Unicode characters into a stream of tokens. For convenience, the lexer grammar is repeated here. See also the description of the semantic notation.
This document is also available as a Word 98 rtf file.
The start symbols are:
NextTokenunit
if the previous token was a number;
NextTokenre
if the previous token was not a number and a / should be interpreted as a regular
expression; and
NextTokendiv
if the previous token was not a number and a / should be interpreted as a division or
division-assignment operator.
type SemanticException = oneof {syntaxError}
«TAB» | «VT» | «FF» | «SP» | «u00A0»«u2000» | «u2001» | «u2002» | «u2003» | «u2004» | «u2005» | «u2006» | «u2007»«u2008» | «u2009» | «u200A» | «u200B»«u3000»action DecimalValue[ASCIIDigit] : Integer = digitValue(ASCIIDigit)
action Token[NextToken] : Token
Token[NextTokenre WhiteSpace Tokenre] = Token[Tokenre]
Token[NextTokendiv WhiteSpace Tokendiv] = Token[Tokendiv]
Token[NextTokenunit [lookahead{OrdinaryContinuingIdentifierCharacter, \}] WhiteSpace Tokendiv]
= Token[Tokendiv]
Token[NextTokenunit [lookahead{_}] IdentifierName] = string Name[IdentifierName]
Token[NextTokenunit _ IdentifierName] = string Name[IdentifierName]
type RegExp = tuple {reBody: String; reFlags: String}
type Quantity = tuple {amount: Double; unit: String}
type Token
= oneof {
lineBreak;
identifier: String;
keyword: String;
punctuator: String;
number: Double;
string: String;
regularExpression: RegExp;
end}
Token[Token LineBreaks] = lineBreak
Token[Token IdentifierOrReservedWord] = Token[IdentifierOrReservedWord]
Token[Token Punctuator] = punctuator Punctuator[Punctuator]
Token[Tokendiv DivisionPunctuator] = punctuator Punctuator[DivisionPunctuator]
Token[Token NumericLiteral] = number DoubleValue[NumericLiteral]
Token[Token StringLiteral] = string StringValue[StringLiteral]
Token[Tokenre RegExpLiteral] = regularExpression REValue[RegExpLiteral]
Token[Token EndOfInput] = end
action Name[IdentifierName] : String
Name[IdentifierName InitialIdentifierCharacter]
= [CharacterValue[InitialIdentifierCharacter]]
Name[IdentifierName IdentifierName1 ContinuingIdentifierCharacter]
= Name[IdentifierName1] [CharacterValue[ContinuingIdentifierCharacter]]
action ContainsEscapes[IdentifierName] : Boolean
ContainsEscapes[IdentifierName InitialIdentifierCharacter]
= ContainsEscapes[InitialIdentifierCharacter]
ContainsEscapes[IdentifierName IdentifierName1 ContinuingIdentifierCharacter]
= ContainsEscapes[IdentifierName1] or ContainsEscapes[ContinuingIdentifierCharacter]
action CharacterValue[InitialIdentifierCharacter] : Character
CharacterValue[InitialIdentifierCharacter OrdinaryInitialIdentifierCharacter]
= OrdinaryInitialIdentifierCharacter
CharacterValue[InitialIdentifierCharacter \ HexEscape]
= if isOrdinaryInitialIdentifierCharacter(CharacterValue[HexEscape])
then CharacterValue[HexEscape]
else throw syntaxError
action ContainsEscapes[InitialIdentifierCharacter] : Boolean
ContainsEscapes[InitialIdentifierCharacter OrdinaryInitialIdentifierCharacter] = false
ContainsEscapes[InitialIdentifierCharacter \ HexEscape] = true
action CharacterValue[ContinuingIdentifierCharacter] : Character
CharacterValue[ContinuingIdentifierCharacter OrdinaryContinuingIdentifierCharacter]
= OrdinaryContinuingIdentifierCharacter
CharacterValue[ContinuingIdentifierCharacter \ HexEscape]
= if isOrdinaryContinuingIdentifierCharacter(CharacterValue[HexEscape])
then CharacterValue[HexEscape]
else throw syntaxError
action ContainsEscapes[ContinuingIdentifierCharacter] : Boolean
ContainsEscapes[ContinuingIdentifierCharacter OrdinaryContinuingIdentifierCharacter]
= false
ContainsEscapes[ContinuingIdentifierCharacter \ HexEscape] = true
reservedWords : String[]
= [“abstract”,
“break”,
“case”,
“catch”,
“class”,
“const”,
“continue”,
“debugger”,
“default”,
“delete”,
“do”,
“else”,
“enum”,
“eval”,
“export”,
“extends”,
“false”,
“final”,
“finally”,
“for”,
“function”,
“goto”,
“if”,
“implements”,
“import”,
“in”,
“instanceof”,
“native”,
“new”,
“null”,
“package”,
“private”,
“protected”,
“public”,
“return”,
“static”,
“super”,
“switch”,
“synchronized”,
“this”,
“throw”,
“throws”,
“transient”,
“true”,
“try”,
“typeof”,
“var”,
“volatile”,
“while”,
“with”]
nonReservedWords : String[]
= [“box”,
“constructor”,
“field”,
“get”,
“language”,
“local”,
“method”,
“override”,
“set”,
“version”]
keywords : String[] = reservedWords nonReservedWords
member(id: String, list: String[]) : Boolean
= if |list| = 0
then false
else if id = list[0]
then true
else member(id, list[1 ...])
action Token[IdentifierOrReservedWord] : Token
Token[IdentifierOrReservedWord IdentifierName]
= let id: String = Name[IdentifierName]
in if member(id, keywords) and not ContainsEscapes[IdentifierName]
then keyword id
else identifier id
!! =! = =#%% =&& && & =& =()** =++ ++ =,-- -- =- >.. .. . .:: :;<< << < =< === == = =>> => >> > => > >> > > =?@[]^^ =^ ^^ ^ ={|| =| || | =}~action Punctuator[Punctuator] : String
Punctuator[Punctuator !] = “!”
Punctuator[Punctuator ! =] = “!=”
Punctuator[Punctuator ! = =] = “!==”
Punctuator[Punctuator #] = “#”
Punctuator[Punctuator %] = “%”
Punctuator[Punctuator % =] = “%=”
Punctuator[Punctuator &] = “&”
Punctuator[Punctuator & &] = “&&”
Punctuator[Punctuator & & =] = “&&=”
Punctuator[Punctuator & =] = “&=”
Punctuator[Punctuator (] = “(”
Punctuator[Punctuator )] = “)”
Punctuator[Punctuator *] = “*”
Punctuator[Punctuator * =] = “*=”
Punctuator[Punctuator +] = “+”
Punctuator[Punctuator + +] = “++”
Punctuator[Punctuator + =] = “+=”
Punctuator[Punctuator ,] = “,”
Punctuator[Punctuator -] = “-”
Punctuator[Punctuator - -] = “--”
Punctuator[Punctuator - =] = “-=”
Punctuator[Punctuator - >] = “->”
Punctuator[Punctuator .] = “.”
Punctuator[Punctuator . .] = “..”
Punctuator[Punctuator . . .] = “...”
Punctuator[Punctuator :] = “:”
Punctuator[Punctuator : :] = “::”
Punctuator[Punctuator ;] = “;”
Punctuator[Punctuator <] = “<”
Punctuator[Punctuator < <] = “<<”
Punctuator[Punctuator < < =] = “<<=”
Punctuator[Punctuator < =] = “<=”
Punctuator[Punctuator =] = “=”
Punctuator[Punctuator = =] = “==”
Punctuator[Punctuator = = =] = “===”
Punctuator[Punctuator >] = “>”
Punctuator[Punctuator > =] = “>=”
Punctuator[Punctuator > >] = “>>”
Punctuator[Punctuator > > =] = “>>=”
Punctuator[Punctuator > > >] = “>>>”
Punctuator[Punctuator > > > =] = “>>>=”
Punctuator[Punctuator ?] = “?”
Punctuator[Punctuator @] = “@”
Punctuator[Punctuator [] = “[”
Punctuator[Punctuator ]] = “]”
Punctuator[Punctuator ^] = “^”
Punctuator[Punctuator ^ =] = “^=”
Punctuator[Punctuator ^ ^] = “^^”
Punctuator[Punctuator ^ ^ =] = “^^=”
Punctuator[Punctuator {] = “{”
Punctuator[Punctuator |] = “|”
Punctuator[Punctuator | =] = “|=”
Punctuator[Punctuator | |] = “||”
Punctuator[Punctuator | | =] = “||=”
Punctuator[Punctuator }] = “}”
Punctuator[Punctuator ~] = “~”
action Punctuator[DivisionPunctuator] : String
Punctuator[DivisionPunctuator / [lookahead{/, *}]] = “/”
Punctuator[DivisionPunctuator / =] = “/=”
action DoubleValue[NumericLiteral] : Double
DoubleValue[NumericLiteral DecimalLiteral]
= rationalToDouble(RationalValue[DecimalLiteral])
DoubleValue[NumericLiteral HexIntegerLiteral [lookahead{HexDigit}]]
= rationalToDouble(IntegerValue[HexIntegerLiteral])
expt(base: Rational, exponent: Integer) : Rational
= if exponent = 0
then 1
else if exponent < 0
then 1/expt(base, -exponent)
else base*expt(base, exponent - 1)
. Fractionaction RationalValue[DecimalLiteral] : Rational
RationalValue[DecimalLiteral Mantissa] = RationalValue[Mantissa]
RationalValue[DecimalLiteral Mantissa LetterE SignedInteger]
= RationalValue[Mantissa]*expt(10, IntegerValue[SignedInteger])
action RationalValue[Mantissa] : Rational
RationalValue[Mantissa DecimalIntegerLiteral] = IntegerValue[DecimalIntegerLiteral]
RationalValue[Mantissa DecimalIntegerLiteral .] = IntegerValue[DecimalIntegerLiteral]
RationalValue[Mantissa DecimalIntegerLiteral . Fraction]
= IntegerValue[DecimalIntegerLiteral] + RationalValue[Fraction]
RationalValue[Mantissa . Fraction] = RationalValue[Fraction]
action IntegerValue[DecimalIntegerLiteral] : Integer
IntegerValue[DecimalIntegerLiteral 0] = 0
IntegerValue[DecimalIntegerLiteral NonZeroDecimalDigits]
= IntegerValue[NonZeroDecimalDigits]
action IntegerValue[NonZeroDecimalDigits] : Integer
IntegerValue[NonZeroDecimalDigits NonZeroDigit] = DecimalValue[NonZeroDigit]
IntegerValue[NonZeroDecimalDigits NonZeroDecimalDigits1 ASCIIDigit]
= 10*IntegerValue[NonZeroDecimalDigits1] + DecimalValue[ASCIIDigit]
action DecimalValue[NonZeroDigit] : Integer = digitValue(NonZeroDigit)
action RationalValue[Fraction] : Rational
RationalValue[Fraction DecimalDigits]
= IntegerValue[DecimalDigits]/expt(10, NDigits[DecimalDigits])
action IntegerValue[SignedInteger] : Integer
IntegerValue[SignedInteger DecimalDigits] = IntegerValue[DecimalDigits]
IntegerValue[SignedInteger + DecimalDigits] = IntegerValue[DecimalDigits]
IntegerValue[SignedInteger - DecimalDigits] = -IntegerValue[DecimalDigits]
action IntegerValue[DecimalDigits] : Integer
IntegerValue[DecimalDigits ASCIIDigit] = DecimalValue[ASCIIDigit]
IntegerValue[DecimalDigits DecimalDigits1 ASCIIDigit]
= 10*IntegerValue[DecimalDigits1] + DecimalValue[ASCIIDigit]
action NDigits[DecimalDigits] : Integer
NDigits[DecimalDigits ASCIIDigit] = 1
NDigits[DecimalDigits DecimalDigits1 ASCIIDigit] = NDigits[DecimalDigits1] + 1
action IntegerValue[HexIntegerLiteral] : Integer
IntegerValue[HexIntegerLiteral 0 LetterX HexDigit] = HexValue[HexDigit]
IntegerValue[HexIntegerLiteral HexIntegerLiteral1 HexDigit]
= 16*IntegerValue[HexIntegerLiteral1] + HexValue[HexDigit]
action HexValue[HexDigit] : Integer = digitValue(HexDigit)
action StringValue[StringLiteral] : String
StringValue[StringLiteral ' StringCharssingle '] = StringValue[StringCharssingle]
StringValue[StringLiteral " StringCharsdouble "] = StringValue[StringCharsdouble]
action StringValue[StringChars] : String
StringValue[StringChars «empty»] = “”
StringValue[StringChars StringChars1 StringChar]
= StringValue[StringChars1] [CharacterValue[StringChar]]
action CharacterValue[StringChar] : Character
CharacterValue[StringChar LiteralStringChar] = LiteralStringChar
CharacterValue[StringChar \ StringEscape] = CharacterValue[StringEscape]
action CharacterValue[StringEscape] : Character
CharacterValue[StringEscape ControlEscape] = CharacterValue[ControlEscape]
CharacterValue[StringEscape ZeroEscape] = CharacterValue[ZeroEscape]
CharacterValue[StringEscape HexEscape] = CharacterValue[HexEscape]
CharacterValue[StringEscape IdentityEscape] = IdentityEscape
action CharacterValue[ControlEscape] : Character
CharacterValue[ControlEscape b] = ‘«BS»’
CharacterValue[ControlEscape f] = ‘«FF»’
CharacterValue[ControlEscape n] = ‘«LF»’
CharacterValue[ControlEscape r] = ‘«CR»’
CharacterValue[ControlEscape t] = ‘«TAB»’
CharacterValue[ControlEscape v] = ‘«VT»’
action CharacterValue[ZeroEscape] : Character
CharacterValue[ZeroEscape 0 [lookahead{ASCIIDigit}]] = ‘«NUL»’
action CharacterValue[HexEscape] : Character
CharacterValue[HexEscape x HexDigit1 HexDigit2]
= codeToCharacter(16*HexValue[HexDigit1] + HexValue[HexDigit2])
CharacterValue[HexEscape u HexDigit1 HexDigit2 HexDigit3 HexDigit4]
= codeToCharacter(
4096*HexValue[HexDigit1] + 256*HexValue[HexDigit2] + 16*HexValue[HexDigit3] +
HexValue[HexDigit4])
action REValue[RegExpLiteral] : RegExp
REValue[RegExpLiteral RegExpBody RegExpFlags]
= reBody REBody[RegExpBody], reFlags REFlags[RegExpFlags]
action REFlags[RegExpFlags] : String
REFlags[RegExpFlags «empty»] = “”
REFlags[RegExpFlags RegExpFlags1 ContinuingIdentifierCharacter]
= REFlags[RegExpFlags1] [CharacterValue[ContinuingIdentifierCharacter]]
action REBody[RegExpBody] : String
REBody[RegExpBody / [lookahead{*}] RegExpChars /] = REBody[RegExpChars]
action REBody[RegExpChars] : String
REBody[RegExpChars RegExpChar] = REBody[RegExpChar]
REBody[RegExpChars RegExpChars1 RegExpChar]
= REBody[RegExpChars1] REBody[RegExpChar]
action REBody[RegExpChar] : String
REBody[RegExpChar OrdinaryRegExpChar] = [OrdinaryRegExpChar]
REBody[RegExpChar \ NonTerminator] = [‘\’, NonTerminator]
|
Waldemar Horwat Last modified Monday, December 6, 1999 |