July 2000 Draft
JavaScript 2.0
Formal Description
Lexer Semantics
|
Monday, December 6, 1999
The lexer semantics describe the actions the lexer takes in order to transform an input stream of Unicode characters into a stream of tokens. For convenience, the lexer grammar is repeated here. See also the description of the semantic notation.
This document is also available as a Word 98 rtf file.
The start symbols are:
NextTokenunit
if the previous token was a number;
NextTokenre
if the previous token was not a number and a /
should be interpreted as a regular
expression; and
NextTokendiv
if the previous token was not a number and a /
should be interpreted as a division or
division-assignment operator.
type SemanticException = oneof {syntaxError}
«TAB»
| «VT»
| «FF»
| «SP»
| «u00A0»
«u2000»
| «u2001»
| «u2002»
| «u2003»
| «u2004»
| «u2005»
| «u2006»
| «u2007»
«u2008»
| «u2009»
| «u200A»
| «u200B»
«u3000»
action DecimalValue[ASCIIDigit] : Integer = digitValue(ASCIIDigit)
action Token[NextToken] : Token
Token[NextTokenre WhiteSpace Tokenre] = Token[Tokenre]
Token[NextTokendiv WhiteSpace Tokendiv] = Token[Tokendiv]
Token[NextTokenunit [lookahead{OrdinaryContinuingIdentifierCharacter, \
}] WhiteSpace Tokendiv]
= Token[Tokendiv]
Token[NextTokenunit [lookahead{_
}] IdentifierName] = string Name[IdentifierName]
Token[NextTokenunit _
IdentifierName] = string Name[IdentifierName]
type RegExp = tuple {reBody: String; reFlags: String}
type Quantity = tuple {amount: Double; unit: String}
type Token
= oneof {
lineBreak;
identifier: String;
keyword: String;
punctuator: String;
number: Double;
string: String;
regularExpression: RegExp;
end}
Token[Token LineBreaks] = lineBreak
Token[Token IdentifierOrReservedWord] = Token[IdentifierOrReservedWord]
Token[Token Punctuator] = punctuator Punctuator[Punctuator]
Token[Tokendiv DivisionPunctuator] = punctuator Punctuator[DivisionPunctuator]
Token[Token NumericLiteral] = number DoubleValue[NumericLiteral]
Token[Token StringLiteral] = string StringValue[StringLiteral]
Token[Tokenre RegExpLiteral] = regularExpression REValue[RegExpLiteral]
Token[Token EndOfInput] = end
action Name[IdentifierName] : String
Name[IdentifierName InitialIdentifierCharacter]
= [CharacterValue[InitialIdentifierCharacter]]
Name[IdentifierName IdentifierName1 ContinuingIdentifierCharacter]
= Name[IdentifierName1] [CharacterValue[ContinuingIdentifierCharacter]]
action ContainsEscapes[IdentifierName] : Boolean
ContainsEscapes[IdentifierName InitialIdentifierCharacter]
= ContainsEscapes[InitialIdentifierCharacter]
ContainsEscapes[IdentifierName IdentifierName1 ContinuingIdentifierCharacter]
= ContainsEscapes[IdentifierName1] or ContainsEscapes[ContinuingIdentifierCharacter]
action CharacterValue[InitialIdentifierCharacter] : Character
CharacterValue[InitialIdentifierCharacter OrdinaryInitialIdentifierCharacter]
= OrdinaryInitialIdentifierCharacter
CharacterValue[InitialIdentifierCharacter \
HexEscape]
= if isOrdinaryInitialIdentifierCharacter(CharacterValue[HexEscape])
then CharacterValue[HexEscape]
else throw syntaxError
action ContainsEscapes[InitialIdentifierCharacter] : Boolean
ContainsEscapes[InitialIdentifierCharacter OrdinaryInitialIdentifierCharacter] = false
ContainsEscapes[InitialIdentifierCharacter \
HexEscape] = true
action CharacterValue[ContinuingIdentifierCharacter] : Character
CharacterValue[ContinuingIdentifierCharacter OrdinaryContinuingIdentifierCharacter]
= OrdinaryContinuingIdentifierCharacter
CharacterValue[ContinuingIdentifierCharacter \
HexEscape]
= if isOrdinaryContinuingIdentifierCharacter(CharacterValue[HexEscape])
then CharacterValue[HexEscape]
else throw syntaxError
action ContainsEscapes[ContinuingIdentifierCharacter] : Boolean
ContainsEscapes[ContinuingIdentifierCharacter OrdinaryContinuingIdentifierCharacter]
= false
ContainsEscapes[ContinuingIdentifierCharacter \
HexEscape] = true
reservedWords : String[]
= [“abstract
”,
“break
”,
“case
”,
“catch
”,
“class
”,
“const
”,
“continue
”,
“debugger
”,
“default
”,
“delete
”,
“do
”,
“else
”,
“enum
”,
“eval
”,
“export
”,
“extends
”,
“false
”,
“final
”,
“finally
”,
“for
”,
“function
”,
“goto
”,
“if
”,
“implements
”,
“import
”,
“in
”,
“instanceof
”,
“native
”,
“new
”,
“null
”,
“package
”,
“private
”,
“protected
”,
“public
”,
“return
”,
“static
”,
“super
”,
“switch
”,
“synchronized
”,
“this
”,
“throw
”,
“throws
”,
“transient
”,
“true
”,
“try
”,
“typeof
”,
“var
”,
“volatile
”,
“while
”,
“with
”]
nonReservedWords : String[]
= [“box
”,
“constructor
”,
“field
”,
“get
”,
“language
”,
“local
”,
“method
”,
“override
”,
“set
”,
“version
”]
keywords : String[] = reservedWords nonReservedWords
member(id: String, list: String[]) : Boolean
= if |list| = 0
then false
else if id = list[0]
then true
else member(id, list[1 ...])
action Token[IdentifierOrReservedWord] : Token
Token[IdentifierOrReservedWord IdentifierName]
= let id: String = Name[IdentifierName]
in if member(id, keywords) and not ContainsEscapes[IdentifierName]
then keyword id
else identifier id
!
!
=
!
=
=
#
%
%
=
&
&
&
&
&
=
&
=
(
)
*
*
=
+
+
+
+
=
,
-
-
-
-
=
-
>
.
.
.
.
.
.
:
:
:
;
<
<
<
<
<
=
<
=
=
=
=
=
=
=
>
>
=
>
>
>
>
=
>
>
>
>
>
>
=
?
@
[
]
^
^
=
^
^
^
^
=
{
|
|
=
|
|
|
|
=
}
~
action Punctuator[Punctuator] : String
Punctuator[Punctuator !
] = “!
”
Punctuator[Punctuator !
=
] = “!=
”
Punctuator[Punctuator !
=
=
] = “!==
”
Punctuator[Punctuator #
] = “#
”
Punctuator[Punctuator %
] = “%
”
Punctuator[Punctuator %
=
] = “%=
”
Punctuator[Punctuator &
] = “&
”
Punctuator[Punctuator &
&
] = “&&
”
Punctuator[Punctuator &
&
=
] = “&&=
”
Punctuator[Punctuator &
=
] = “&=
”
Punctuator[Punctuator (
] = “(
”
Punctuator[Punctuator )
] = “)
”
Punctuator[Punctuator *
] = “*
”
Punctuator[Punctuator *
=
] = “*=
”
Punctuator[Punctuator +
] = “+
”
Punctuator[Punctuator +
+
] = “++
”
Punctuator[Punctuator +
=
] = “+=
”
Punctuator[Punctuator ,
] = “,
”
Punctuator[Punctuator -
] = “-
”
Punctuator[Punctuator -
-
] = “--
”
Punctuator[Punctuator -
=
] = “-=
”
Punctuator[Punctuator -
>
] = “->
”
Punctuator[Punctuator .
] = “.
”
Punctuator[Punctuator .
.
] = “..
”
Punctuator[Punctuator .
.
.
] = “...
”
Punctuator[Punctuator :
] = “:
”
Punctuator[Punctuator :
:
] = “::
”
Punctuator[Punctuator ;
] = “;
”
Punctuator[Punctuator <
] = “<
”
Punctuator[Punctuator <
<
] = “<<
”
Punctuator[Punctuator <
<
=
] = “<<=
”
Punctuator[Punctuator <
=
] = “<=
”
Punctuator[Punctuator =
] = “=
”
Punctuator[Punctuator =
=
] = “==
”
Punctuator[Punctuator =
=
=
] = “===
”
Punctuator[Punctuator >
] = “>
”
Punctuator[Punctuator >
=
] = “>=
”
Punctuator[Punctuator >
>
] = “>>
”
Punctuator[Punctuator >
>
=
] = “>>=
”
Punctuator[Punctuator >
>
>
] = “>>>
”
Punctuator[Punctuator >
>
>
=
] = “>>>=
”
Punctuator[Punctuator ?
] = “?
”
Punctuator[Punctuator @
] = “@
”
Punctuator[Punctuator [
] = “[
”
Punctuator[Punctuator ]
] = “]
”
Punctuator[Punctuator ^
] = “^
”
Punctuator[Punctuator ^
=
] = “^=
”
Punctuator[Punctuator ^
^
] = “^^
”
Punctuator[Punctuator ^
^
=
] = “^^=
”
Punctuator[Punctuator {
] = “{
”
Punctuator[Punctuator |
] = “|
”
Punctuator[Punctuator |
=
] = “|=
”
Punctuator[Punctuator |
|
] = “||
”
Punctuator[Punctuator |
|
=
] = “||=
”
Punctuator[Punctuator }
] = “}
”
Punctuator[Punctuator ~
] = “~
”
action Punctuator[DivisionPunctuator] : String
Punctuator[DivisionPunctuator /
[lookahead{/
, *
}]] = “/
”
Punctuator[DivisionPunctuator /
=
] = “/=
”
action DoubleValue[NumericLiteral] : Double
DoubleValue[NumericLiteral DecimalLiteral]
= rationalToDouble(RationalValue[DecimalLiteral])
DoubleValue[NumericLiteral HexIntegerLiteral [lookahead{HexDigit}]]
= rationalToDouble(IntegerValue[HexIntegerLiteral])
expt(base: Rational, exponent: Integer) : Rational
= if exponent = 0
then 1
else if exponent < 0
then 1/expt(base, -exponent)
else base*expt(base, exponent - 1)
.
Fractionaction RationalValue[DecimalLiteral] : Rational
RationalValue[DecimalLiteral Mantissa] = RationalValue[Mantissa]
RationalValue[DecimalLiteral Mantissa LetterE SignedInteger]
= RationalValue[Mantissa]*expt(10, IntegerValue[SignedInteger])
action RationalValue[Mantissa] : Rational
RationalValue[Mantissa DecimalIntegerLiteral] = IntegerValue[DecimalIntegerLiteral]
RationalValue[Mantissa DecimalIntegerLiteral .
] = IntegerValue[DecimalIntegerLiteral]
RationalValue[Mantissa DecimalIntegerLiteral .
Fraction]
= IntegerValue[DecimalIntegerLiteral] + RationalValue[Fraction]
RationalValue[Mantissa .
Fraction] = RationalValue[Fraction]
action IntegerValue[DecimalIntegerLiteral] : Integer
IntegerValue[DecimalIntegerLiteral 0
] = 0
IntegerValue[DecimalIntegerLiteral NonZeroDecimalDigits]
= IntegerValue[NonZeroDecimalDigits]
action IntegerValue[NonZeroDecimalDigits] : Integer
IntegerValue[NonZeroDecimalDigits NonZeroDigit] = DecimalValue[NonZeroDigit]
IntegerValue[NonZeroDecimalDigits NonZeroDecimalDigits1 ASCIIDigit]
= 10*IntegerValue[NonZeroDecimalDigits1] + DecimalValue[ASCIIDigit]
action DecimalValue[NonZeroDigit] : Integer = digitValue(NonZeroDigit)
action RationalValue[Fraction] : Rational
RationalValue[Fraction DecimalDigits]
= IntegerValue[DecimalDigits]/expt(10, NDigits[DecimalDigits])
action IntegerValue[SignedInteger] : Integer
IntegerValue[SignedInteger DecimalDigits] = IntegerValue[DecimalDigits]
IntegerValue[SignedInteger +
DecimalDigits] = IntegerValue[DecimalDigits]
IntegerValue[SignedInteger -
DecimalDigits] = -IntegerValue[DecimalDigits]
action IntegerValue[DecimalDigits] : Integer
IntegerValue[DecimalDigits ASCIIDigit] = DecimalValue[ASCIIDigit]
IntegerValue[DecimalDigits DecimalDigits1 ASCIIDigit]
= 10*IntegerValue[DecimalDigits1] + DecimalValue[ASCIIDigit]
action NDigits[DecimalDigits] : Integer
NDigits[DecimalDigits ASCIIDigit] = 1
NDigits[DecimalDigits DecimalDigits1 ASCIIDigit] = NDigits[DecimalDigits1] + 1
action IntegerValue[HexIntegerLiteral] : Integer
IntegerValue[HexIntegerLiteral 0
LetterX HexDigit] = HexValue[HexDigit]
IntegerValue[HexIntegerLiteral HexIntegerLiteral1 HexDigit]
= 16*IntegerValue[HexIntegerLiteral1] + HexValue[HexDigit]
action HexValue[HexDigit] : Integer = digitValue(HexDigit)
action StringValue[StringLiteral] : String
StringValue[StringLiteral '
StringCharssingle '
] = StringValue[StringCharssingle]
StringValue[StringLiteral "
StringCharsdouble "
] = StringValue[StringCharsdouble]
action StringValue[StringChars] : String
StringValue[StringChars «empty»] = “”
StringValue[StringChars StringChars1 StringChar]
= StringValue[StringChars1] [CharacterValue[StringChar]]
action CharacterValue[StringChar] : Character
CharacterValue[StringChar LiteralStringChar] = LiteralStringChar
CharacterValue[StringChar \
StringEscape] = CharacterValue[StringEscape]
action CharacterValue[StringEscape] : Character
CharacterValue[StringEscape ControlEscape] = CharacterValue[ControlEscape]
CharacterValue[StringEscape ZeroEscape] = CharacterValue[ZeroEscape]
CharacterValue[StringEscape HexEscape] = CharacterValue[HexEscape]
CharacterValue[StringEscape IdentityEscape] = IdentityEscape
action CharacterValue[ControlEscape] : Character
CharacterValue[ControlEscape b
] = ‘«BS»
’
CharacterValue[ControlEscape f
] = ‘«FF»
’
CharacterValue[ControlEscape n
] = ‘«LF»
’
CharacterValue[ControlEscape r
] = ‘«CR»
’
CharacterValue[ControlEscape t
] = ‘«TAB»
’
CharacterValue[ControlEscape v
] = ‘«VT»
’
action CharacterValue[ZeroEscape] : Character
CharacterValue[ZeroEscape 0
[lookahead{ASCIIDigit}]] = ‘«NUL»
’
action CharacterValue[HexEscape] : Character
CharacterValue[HexEscape x
HexDigit1 HexDigit2]
= codeToCharacter(16*HexValue[HexDigit1] + HexValue[HexDigit2])
CharacterValue[HexEscape u
HexDigit1 HexDigit2 HexDigit3 HexDigit4]
= codeToCharacter(
4096*HexValue[HexDigit1] + 256*HexValue[HexDigit2] + 16*HexValue[HexDigit3] +
HexValue[HexDigit4])
action REValue[RegExpLiteral] : RegExp
REValue[RegExpLiteral RegExpBody RegExpFlags]
= reBody REBody[RegExpBody], reFlags REFlags[RegExpFlags]
action REFlags[RegExpFlags] : String
REFlags[RegExpFlags «empty»] = “”
REFlags[RegExpFlags RegExpFlags1 ContinuingIdentifierCharacter]
= REFlags[RegExpFlags1] [CharacterValue[ContinuingIdentifierCharacter]]
action REBody[RegExpBody] : String
REBody[RegExpBody /
[lookahead{*
}] RegExpChars /
] = REBody[RegExpChars]
action REBody[RegExpChars] : String
REBody[RegExpChars RegExpChar] = REBody[RegExpChar]
REBody[RegExpChars RegExpChars1 RegExpChar]
= REBody[RegExpChars1] REBody[RegExpChar]
action REBody[RegExpChar] : String
REBody[RegExpChar OrdinaryRegExpChar] = [OrdinaryRegExpChar]
REBody[RegExpChar \
NonTerminator] = [‘\
’, NonTerminator]
Waldemar Horwat Last modified Monday, December 6, 1999 |