diff options
| author | Rob Pike <r@golang.org> | 2009-06-09 09:53:44 -0700 |
|---|---|---|
| committer | Rob Pike <r@golang.org> | 2009-06-09 09:53:44 -0700 |
| commit | d90e7cbac65c5792ce312ee82fbe03a5dfc98c6f (patch) | |
| tree | 7032a11d0cac2ae4d3e90f7a189b575b5a50f848 /src/lib/datafmt/parser.go | |
| parent | bf5c0c957c3c3ea9add6cfd51b90c463cb4814b5 (diff) | |
| download | go-d90e7cbac65c5792ce312ee82fbe03a5dfc98c6f.tar.xz | |
mv src/lib to src/pkg
tests: all.bash passes, gobuild still works, godoc still works.
R=rsc
OCL=30096
CL=30102
Diffstat (limited to 'src/lib/datafmt/parser.go')
| -rw-r--r-- | src/lib/datafmt/parser.go | 447 |
1 files changed, 0 insertions, 447 deletions
diff --git a/src/lib/datafmt/parser.go b/src/lib/datafmt/parser.go deleted file mode 100644 index 0d597dcb5f..0000000000 --- a/src/lib/datafmt/parser.go +++ /dev/null @@ -1,447 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package datafmt - -import ( - "container/vector"; - "datafmt"; - "fmt"; - "go/scanner"; - "go/token"; - "io"; - "os"; - "strconv"; - "strings"; -) - -// ---------------------------------------------------------------------------- -// Error handling - -// Error describes an individual error. The position Pos, if valid, -// indicates the format source position the error relates to. The -// error is specified with the Msg string. -// -type Error struct { - Pos token.Position; - Msg string; -} - - -func (e *Error) String() string { - pos := ""; - if e.Pos.IsValid() { - pos = fmt.Sprintf("%d:%d: ", e.Pos.Line, e.Pos.Column); - } - return pos + e.Msg; -} - - -// An ErrorList is a list of errors encountered during parsing. -type ErrorList []*Error - - -// ErrorList implements SortInterface and the os.Error interface. - -func (p ErrorList) Len() int { return len(p); } -func (p ErrorList) Swap(i, j int) { p[i], p[j] = p[j], p[i]; } -func (p ErrorList) Less(i, j int) bool { return p[i].Pos.Offset < p[j].Pos.Offset; } - - -func (p ErrorList) String() string { - switch len(p) { - case 0: return "unspecified error"; - case 1: return p[0].String(); - } - return fmt.Sprintf("%s (and %d more errors)", p[0].String(), len(p) - 1); -} - - -// ---------------------------------------------------------------------------- -// Parsing - -type parser struct { - errors vector.Vector; - scanner scanner.Scanner; - pos token.Position; // token position - tok token.Token; // one token look-ahead - lit []byte; // token literal - - packs map [string] string; // PackageName -> ImportPath - rules map [string] expr; // RuleName -> Expression -} - - -func (p *parser) next() { - p.pos, p.tok, p.lit = p.scanner.Scan(); - switch p.tok { - case token.CHAN, token.FUNC, token.INTERFACE, token.MAP, token.STRUCT: - // Go keywords for composite types are type names - // returned by reflect. Accept them as identifiers. - p.tok = token.IDENT; // p.lit is already set correctly - } -} - - -func (p *parser) init(src []byte) { - p.errors.Init(0); - p.scanner.Init(src, p, scanner.AllowIllegalChars); // return '@' as token.ILLEGAL w/o error message - p.next(); // initializes pos, tok, lit - p.packs = make(map [string] string); - p.rules = make(map [string] expr); -} - - -// The parser implements scanner.Error. -func (p *parser) Error(pos token.Position, msg string) { - // Don't collect errors that are on the same line as the previous error - // in the hope to reduce the number of spurious errors due to incorrect - // parser synchronization. - if p.errors.Len() == 0 || p.errors.Last().(*Error).Pos.Line != pos.Line { - p.errors.Push(&Error{pos, msg}); - } -} - - -func (p *parser) errorExpected(pos token.Position, msg string) { - msg = "expected " + msg; - if pos.Offset == p.pos.Offset { - // the error happened at the current position; - // make the error message more specific - msg += ", found '" + p.tok.String() + "'"; - if p.tok.IsLiteral() { - msg += " " + string(p.lit); - } - } - p.Error(pos, msg); -} - - -func (p *parser) expect(tok token.Token) token.Position { - pos := p.pos; - if p.tok != tok { - p.errorExpected(pos, "'" + tok.String() + "'"); - } - p.next(); // make progress in any case - return pos; -} - - -func (p *parser) parseIdentifier() string { - name := string(p.lit); - p.expect(token.IDENT); - return name; -} - - -func (p *parser) parseTypeName() (string, bool) { - pos := p.pos; - name, isIdent := p.parseIdentifier(), true; - if p.tok == token.PERIOD { - // got a package name, lookup package - if importPath, found := p.packs[name]; found { - name = importPath; - } else { - p.Error(pos, "package not declared: " + name); - } - p.next(); - name, isIdent = name + "." + p.parseIdentifier(), false; - } - return name, isIdent; -} - - -// Parses a rule name and returns it. If the rule name is -// a package-qualified type name, the package name is resolved. -// The 2nd result value is true iff the rule name consists of a -// single identifier only (and thus could be a package name). -// -func (p *parser) parseRuleName() (string, bool) { - name, isIdent := "", false; - switch p.tok { - case token.IDENT: - name, isIdent = p.parseTypeName(); - case token.DEFAULT: - name = "default"; - p.next(); - case token.QUO: - name = "/"; - p.next(); - default: - p.errorExpected(p.pos, "rule name"); - p.next(); // make progress in any case - } - return name, isIdent; -} - - -func (p *parser) parseString() string { - s := ""; - if p.tok == token.STRING { - var err os.Error; - s, err = strconv.Unquote(string(p.lit)); - // Unquote may fail with an error, but only if the scanner found - // an illegal string in the first place. In this case the error - // has already been reported. - p.next(); - return s; - } else { - p.expect(token.STRING); - } - return s; -} - - -func (p *parser) parseLiteral() literal { - s := io.StringBytes(p.parseString()); - - // A string literal may contain %-format specifiers. To simplify - // and speed up printing of the literal, split it into segments - // that start with "%" possibly followed by a last segment that - // starts with some other character. - var list vector.Vector; - list.Init(0); - i0 := 0; - for i := 0; i < len(s); i++ { - if s[i] == '%' && i+1 < len(s) { - // the next segment starts with a % format - if i0 < i { - // the current segment is not empty, split it off - list.Push(s[i0 : i]); - i0 = i; - } - i++; // skip %; let loop skip over char after % - } - } - // the final segment may start with any character - // (it is empty iff the string is empty) - list.Push(s[i0 : len(s)]); - - // convert list into a literal - lit := make(literal, list.Len()); - for i := 0; i < list.Len(); i++ { - lit[i] = list.At(i).([]byte); - } - - return lit; -} - - -func (p *parser) parseField() expr { - var fname string; - switch p.tok { - case token.ILLEGAL: - if string(p.lit) != "@" { - return nil; - } - fname = "@"; - p.next(); - case token.MUL: - fname = "*"; - p.next(); - case token.IDENT: - fname = p.parseIdentifier(); - default: - return nil; - } - - var ruleName string; - if p.tok == token.COLON { - p.next(); - var _ bool; - ruleName, _ = p.parseRuleName(); - } - - return &field{fname, ruleName}; -} - - -func (p *parser) parseExpression() expr - -func (p *parser) parseOperand() (x expr) { - switch p.tok { - case token.STRING: - x = p.parseLiteral(); - - case token.LPAREN: - p.next(); - x = p.parseExpression(); - if p.tok == token.SHR { - p.next(); - x = &group{x, p.parseExpression()}; - } - p.expect(token.RPAREN); - - case token.LBRACK: - p.next(); - x = &option{p.parseExpression()}; - p.expect(token.RBRACK); - - case token.LBRACE: - p.next(); - x = p.parseExpression(); - var div expr; - if p.tok == token.QUO { - p.next(); - div = p.parseExpression(); - } - x = &repetition{x, div}; - p.expect(token.RBRACE); - - default: - x = p.parseField(); // may be nil - } - - return x; -} - - -func (p *parser) parseSequence() expr { - var list vector.Vector; - list.Init(0); - - for x := p.parseOperand(); x != nil; x = p.parseOperand() { - list.Push(x); - } - - // no need for a sequence if list.Len() < 2 - switch list.Len() { - case 0: return nil; - case 1: return list.At(0).(expr); - } - - // convert list into a sequence - seq := make(sequence, list.Len()); - for i := 0; i < list.Len(); i++ { - seq[i] = list.At(i).(expr); - } - return seq; -} - - -func (p *parser) parseExpression() expr { - var list vector.Vector; - list.Init(0); - - for { - x := p.parseSequence(); - if x != nil { - list.Push(x); - } - if p.tok != token.OR { - break; - } - p.next(); - } - - // no need for an alternatives if list.Len() < 2 - switch list.Len() { - case 0: return nil; - case 1: return list.At(0).(expr); - } - - // convert list into a alternatives - alt := make(alternatives, list.Len()); - for i := 0; i < list.Len(); i++ { - alt[i] = list.At(i).(expr); - } - return alt; -} - - -func (p *parser) parseFormat() { - for p.tok != token.EOF { - pos := p.pos; - - name, isIdent := p.parseRuleName(); - switch p.tok { - case token.STRING: - // package declaration - importPath := p.parseString(); - - // add package declaration - if !isIdent { - p.Error(pos, "illegal package name: " + name); - } else if _, found := p.packs[name]; !found { - p.packs[name] = importPath; - } else { - p.Error(pos, "package already declared: " + name); - } - - case token.ASSIGN: - // format rule - p.next(); - x := p.parseExpression(); - - // add rule - if _, found := p.rules[name]; !found { - p.rules[name] = x; - } else { - p.Error(pos, "format rule already declared: " + name); - } - - default: - p.errorExpected(p.pos, "package declaration or format rule"); - p.next(); // make progress in any case - } - - if p.tok == token.SEMICOLON { - p.next(); - } else { - break; - } - } - p.expect(token.EOF); -} - - -func remap(p *parser, name string) string { - i := strings.Index(name, "."); - if i >= 0 { - packageName, suffix := name[0 : i], name[i : len(name)]; - // lookup package - if importPath, found := p.packs[packageName]; found { - name = importPath + suffix; - } else { - var invalidPos token.Position; - p.Error(invalidPos, "package not declared: " + packageName); - } - } - return name; -} - - -// Parse parses a set of format productions from source src. Custom -// formatters may be provided via a map of formatter functions. If -// there are no errors, the result is a Format and the error is nil. -// Otherwise the format is nil and a non-empty ErrorList is returned. -// -func Parse(src []byte, fmap FormatterMap) (Format, os.Error) { - // parse source - var p parser; - p.init(src); - p.parseFormat(); - - // add custom formatters, if any - for name, form := range fmap { - name = remap(&p, name); - if t, found := p.rules[name]; !found { - p.rules[name] = &custom{name, form}; - } else { - var invalidPos token.Position; - p.Error(invalidPos, "formatter already declared: " + name); - } - } - - // convert errors list, if any - if p.errors.Len() > 0 { - errors := make(ErrorList, p.errors.Len()); - for i := 0; i < p.errors.Len(); i++ { - errors[i] = p.errors.At(i).(*Error); - } - return nil, errors; - } - - return p.rules, nil; -} |
