diff options
Diffstat (limited to 'src/cmd/compile/internal/syntax/scanner_test.go')
| -rw-r--r-- | src/cmd/compile/internal/syntax/scanner_test.go | 354 |
1 files changed, 354 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/syntax/scanner_test.go b/src/cmd/compile/internal/syntax/scanner_test.go new file mode 100644 index 0000000000..69e81aceca --- /dev/null +++ b/src/cmd/compile/internal/syntax/scanner_test.go @@ -0,0 +1,354 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +import ( + "fmt" + "os" + "testing" +) + +func TestScanner(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode") + } + + src, err := os.Open("parser.go") + if err != nil { + t.Fatal(err) + } + defer src.Close() + + var s scanner + s.init(src, nil) + for { + s.next() + if s.tok == _EOF { + break + } + switch s.tok { + case _Name: + fmt.Println(s.line, s.tok, "=>", s.lit) + case _Operator: + fmt.Println(s.line, s.tok, "=>", s.op, s.prec) + default: + fmt.Println(s.line, s.tok) + } + } +} + +func TestTokens(t *testing.T) { + // make source + var buf []byte + for i, s := range sampleTokens { + buf = append(buf, "\t\t\t\t"[:i&3]...) // leading indentation + buf = append(buf, s.src...) // token + buf = append(buf, " "[:i&7]...) // trailing spaces + buf = append(buf, "/* foo */ // bar\n"...) // comments + } + + // scan source + var got scanner + got.init(&bytesReader{buf}, nil) + got.next() + for i, want := range sampleTokens { + nlsemi := false + + if got.line != i+1 { + t.Errorf("got line %d; want %d", got.line, i+1) + } + + if got.tok != want.tok { + t.Errorf("got tok = %s; want %s", got.tok, want.tok) + continue + } + + switch want.tok { + case _Name, _Literal: + if got.lit != want.src { + t.Errorf("got lit = %q; want %q", got.lit, want.src) + continue + } + nlsemi = true + + case _Operator, _AssignOp, _IncOp: + if got.op != want.op { + t.Errorf("got op = %s; want %s", got.op, want.op) + continue + } + if got.prec != want.prec { + t.Errorf("got prec = %s; want %s", got.prec, want.prec) + continue + } + nlsemi = want.tok == _IncOp + + case _Rparen, _Rbrack, _Rbrace, _Break, _Continue, _Fallthrough, _Return: + nlsemi = true + } + + if nlsemi { + got.next() + if got.tok != _Semi { + t.Errorf("got tok = %s; want ;", got.tok) + continue + } + } + + got.next() + } + + if got.tok != _EOF { + t.Errorf("got %q; want _EOF", got.tok) + } +} + +var sampleTokens = [...]struct { + tok token + src string + op Operator + prec int +}{ + // name samples + {_Name, "x", 0, 0}, + {_Name, "X123", 0, 0}, + {_Name, "foo", 0, 0}, + {_Name, "Foo123", 0, 0}, + {_Name, "foo_bar", 0, 0}, + {_Name, "_", 0, 0}, + {_Name, "_foobar", 0, 0}, + {_Name, "a۰۱۸", 0, 0}, + {_Name, "foo६४", 0, 0}, + {_Name, "bar9876", 0, 0}, + {_Name, "ŝ", 0, 0}, + {_Name, "ŝfoo", 0, 0}, + + // literal samples + {_Literal, "0", 0, 0}, + {_Literal, "1", 0, 0}, + {_Literal, "12345", 0, 0}, + {_Literal, "123456789012345678890123456789012345678890", 0, 0}, + {_Literal, "01234567", 0, 0}, + {_Literal, "0x0", 0, 0}, + {_Literal, "0xcafebabe", 0, 0}, + {_Literal, "0.", 0, 0}, + {_Literal, "0.e0", 0, 0}, + {_Literal, "0.e-1", 0, 0}, + {_Literal, "0.e+123", 0, 0}, + {_Literal, ".0", 0, 0}, + {_Literal, ".0E00", 0, 0}, + {_Literal, ".0E-0123", 0, 0}, + {_Literal, ".0E+12345678901234567890", 0, 0}, + {_Literal, ".45e1", 0, 0}, + {_Literal, "3.14159265", 0, 0}, + {_Literal, "1e0", 0, 0}, + {_Literal, "1e+100", 0, 0}, + {_Literal, "1e-100", 0, 0}, + {_Literal, "2.71828e-1000", 0, 0}, + {_Literal, "0i", 0, 0}, + {_Literal, "1i", 0, 0}, + {_Literal, "012345678901234567889i", 0, 0}, + {_Literal, "123456789012345678890i", 0, 0}, + {_Literal, "0.i", 0, 0}, + {_Literal, ".0i", 0, 0}, + {_Literal, "3.14159265i", 0, 0}, + {_Literal, "1e0i", 0, 0}, + {_Literal, "1e+100i", 0, 0}, + {_Literal, "1e-100i", 0, 0}, + {_Literal, "2.71828e-1000i", 0, 0}, + {_Literal, "'a'", 0, 0}, + {_Literal, "'\\000'", 0, 0}, + {_Literal, "'\\xFF'", 0, 0}, + {_Literal, "'\\uff16'", 0, 0}, + {_Literal, "'\\U0000ff16'", 0, 0}, + {_Literal, "`foobar`", 0, 0}, + {_Literal, "`foo\tbar`", 0, 0}, + {_Literal, "`\r`", 0, 0}, + + // operators + {_Operator, "||", OrOr, precOrOr}, + + {_Operator, "&&", AndAnd, precAndAnd}, + + {_Operator, "==", Eql, precCmp}, + {_Operator, "!=", Neq, precCmp}, + {_Operator, "<", Lss, precCmp}, + {_Operator, "<=", Leq, precCmp}, + {_Operator, ">", Gtr, precCmp}, + {_Operator, ">=", Geq, precCmp}, + + {_Operator, "+", Add, precAdd}, + {_Operator, "-", Sub, precAdd}, + {_Operator, "|", Or, precAdd}, + {_Operator, "^", Xor, precAdd}, + + {_Star, "*", Mul, precMul}, + {_Operator, "/", Div, precMul}, + {_Operator, "%", Rem, precMul}, + {_Operator, "&", And, precMul}, + {_Operator, "&^", AndNot, precMul}, + {_Operator, "<<", Shl, precMul}, + {_Operator, ">>", Shr, precMul}, + + // assignment operations + {_AssignOp, "+=", Add, precAdd}, + {_AssignOp, "-=", Sub, precAdd}, + {_AssignOp, "|=", Or, precAdd}, + {_AssignOp, "^=", Xor, precAdd}, + + {_AssignOp, "*=", Mul, precMul}, + {_AssignOp, "/=", Div, precMul}, + {_AssignOp, "%=", Rem, precMul}, + {_AssignOp, "&=", And, precMul}, + {_AssignOp, "&^=", AndNot, precMul}, + {_AssignOp, "<<=", Shl, precMul}, + {_AssignOp, ">>=", Shr, precMul}, + + // other operations + {_IncOp, "++", Add, precAdd}, + {_IncOp, "--", Sub, precAdd}, + {_Assign, "=", 0, 0}, + {_Define, ":=", 0, 0}, + {_Arrow, "<-", 0, 0}, + + // delimiters + {_Lparen, "(", 0, 0}, + {_Lbrack, "[", 0, 0}, + {_Lbrace, "{", 0, 0}, + {_Rparen, ")", 0, 0}, + {_Rbrack, "]", 0, 0}, + {_Rbrace, "}", 0, 0}, + {_Comma, ",", 0, 0}, + {_Semi, ";", 0, 0}, + {_Colon, ":", 0, 0}, + {_Dot, ".", 0, 0}, + {_DotDotDot, "...", 0, 0}, + + // keywords + {_Break, "break", 0, 0}, + {_Case, "case", 0, 0}, + {_Chan, "chan", 0, 0}, + {_Const, "const", 0, 0}, + {_Continue, "continue", 0, 0}, + {_Default, "default", 0, 0}, + {_Defer, "defer", 0, 0}, + {_Else, "else", 0, 0}, + {_Fallthrough, "fallthrough", 0, 0}, + {_For, "for", 0, 0}, + {_Func, "func", 0, 0}, + {_Go, "go", 0, 0}, + {_Goto, "goto", 0, 0}, + {_If, "if", 0, 0}, + {_Import, "import", 0, 0}, + {_Interface, "interface", 0, 0}, + {_Map, "map", 0, 0}, + {_Package, "package", 0, 0}, + {_Range, "range", 0, 0}, + {_Return, "return", 0, 0}, + {_Select, "select", 0, 0}, + {_Struct, "struct", 0, 0}, + {_Switch, "switch", 0, 0}, + {_Type, "type", 0, 0}, + {_Var, "var", 0, 0}, +} + +func TestScanErrors(t *testing.T) { + for _, test := range []struct { + src, msg string + pos, line int + }{ + // Note: Positions for lexical errors are the earliest position + // where the error is apparent, not the beginning of the respective + // token. + + // rune-level errors + {"fo\x00o", "invalid NUL character", 2, 1}, + {"foo\n\ufeff bar", "invalid BOM in the middle of the file", 4, 2}, + {"foo\n\n\xff ", "invalid UTF-8 encoding", 5, 3}, + + // token-level errors + {"x + ~y", "bitwise complement operator is ^", 4, 1}, + {"foo$bar = 0", "invalid rune '$'", 3, 1}, + {"const x = 0xyz", "malformed hex constant", 12, 1}, + {"0123456789", "malformed octal constant", 10, 1}, + {"0123456789. /* foobar", "comment not terminated", 12, 1}, // valid float constant + {"0123456789e0 /*\nfoobar", "comment not terminated", 13, 1}, // valid float constant + {"var a, b = 08, 07\n", "malformed octal constant", 13, 1}, + {"(x + 1.0e+x)", "malformed floating-point constant exponent", 10, 1}, + + {`''`, "empty character literal", 1, 1}, + {"'\n", "newline in character literal", 1, 1}, + {`'\`, "missing '", 2, 1}, + {`'\'`, "missing '", 3, 1}, + {`'\x`, "missing '", 3, 1}, + {`'\x'`, "escape sequence incomplete", 3, 1}, + {`'\y'`, "unknown escape sequence", 2, 1}, + {`'\x0'`, "escape sequence incomplete", 4, 1}, + {`'\00'`, "escape sequence incomplete", 4, 1}, + {`'\377' /*`, "comment not terminated", 7, 1}, // valid octal escape + {`'\378`, "illegal character U+0038 '8' in escape sequence", 4, 1}, + {`'\400'`, "octal escape value > 255: 256", 5, 1}, + {`'xx`, "missing '", 2, 1}, + + {"\"\n", "newline in string", 1, 1}, + {`"`, "string not terminated", 0, 1}, + {`"foo`, "string not terminated", 0, 1}, + {"`", "string not terminated", 0, 1}, + {"`foo", "string not terminated", 0, 1}, + {"/*/", "comment not terminated", 0, 1}, + {"/*\n\nfoo", "comment not terminated", 0, 1}, + {"/*\n\nfoo", "comment not terminated", 0, 1}, + {`"\`, "string not terminated", 0, 1}, + {`"\"`, "string not terminated", 0, 1}, + {`"\x`, "string not terminated", 0, 1}, + {`"\x"`, "escape sequence incomplete", 3, 1}, + {`"\y"`, "unknown escape sequence", 2, 1}, + {`"\x0"`, "escape sequence incomplete", 4, 1}, + {`"\00"`, "escape sequence incomplete", 4, 1}, + {`"\377" /*`, "comment not terminated", 7, 1}, // valid octal escape + {`"\378"`, "illegal character U+0038 '8' in escape sequence", 4, 1}, + {`"\400"`, "octal escape value > 255: 256", 5, 1}, + + {`s := "foo\z"`, "unknown escape sequence", 10, 1}, + {`s := "foo\z00\nbar"`, "unknown escape sequence", 10, 1}, + {`"\x`, "string not terminated", 0, 1}, + {`"\x"`, "escape sequence incomplete", 3, 1}, + {`var s string = "\x"`, "escape sequence incomplete", 18, 1}, + {`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 18, 1}, + + // former problem cases + {"package p\n\n\xef", "invalid UTF-8 encoding", 11, 3}, + } { + var s scanner + nerrors := 0 + s.init(&bytesReader{[]byte(test.src)}, func(pos, line int, msg string) { + nerrors++ + // only check the first error + if nerrors == 1 { + if msg != test.msg { + t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg) + } + if pos != test.pos { + t.Errorf("%q: got pos = %d; want %d", test.src, pos, test.pos) + } + if line != test.line { + t.Errorf("%q: got line = %d; want %d", test.src, line, test.line) + } + } else if nerrors > 1 { + t.Errorf("%q: got unexpected %q at pos = %d, line = %d", test.src, msg, pos, line) + } + }) + + for { + s.next() + if s.tok == _EOF { + break + } + } + + if nerrors == 0 { + t.Errorf("%q: got no error; want %q", test.src, test.msg) + } + } +} |
