From e58a77809d5d31317f64bfc5b8a96e2fb093bae4 Mon Sep 17 00:00:00 2001 From: Robert Griesemer Date: Tue, 11 Oct 2011 17:43:10 -0700 Subject: ebnf, ebnflint: move under exp pkg/ebnf -> pkg/exp/ebnf cmd/ebnflint -> pkg/exp/ebnflint R=golang-dev, r CC=golang-dev https://golang.org/cl/5188042 --- src/pkg/Makefile | 6 +- src/pkg/ebnf/Makefile | 12 -- src/pkg/ebnf/ebnf.go | 269 --------------------------------------- src/pkg/ebnf/ebnf_test.go | 71 ----------- src/pkg/ebnf/parser.go | 191 --------------------------- src/pkg/exp/ebnf/Makefile | 12 ++ src/pkg/exp/ebnf/ebnf.go | 269 +++++++++++++++++++++++++++++++++++++++ src/pkg/exp/ebnf/ebnf_test.go | 71 +++++++++++ src/pkg/exp/ebnf/parser.go | 191 +++++++++++++++++++++++++++ src/pkg/exp/ebnflint/Makefile | 15 +++ src/pkg/exp/ebnflint/doc.go | 22 ++++ src/pkg/exp/ebnflint/ebnflint.go | 109 ++++++++++++++++ 12 files changed, 692 insertions(+), 546 deletions(-) delete mode 100644 src/pkg/ebnf/Makefile delete mode 100644 src/pkg/ebnf/ebnf.go delete mode 100644 src/pkg/ebnf/ebnf_test.go delete mode 100644 src/pkg/ebnf/parser.go create mode 100644 src/pkg/exp/ebnf/Makefile create mode 100644 src/pkg/exp/ebnf/ebnf.go create mode 100644 src/pkg/exp/ebnf/ebnf_test.go create mode 100644 src/pkg/exp/ebnf/parser.go create mode 100644 src/pkg/exp/ebnflint/Makefile create mode 100644 src/pkg/exp/ebnflint/doc.go create mode 100644 src/pkg/exp/ebnflint/ebnflint.go (limited to 'src/pkg') diff --git a/src/pkg/Makefile b/src/pkg/Makefile index e833fcfbba..321b463b13 100644 --- a/src/pkg/Makefile +++ b/src/pkg/Makefile @@ -68,7 +68,6 @@ DIRS=\ debug/elf\ debug/gosym\ debug/pe\ - ebnf\ encoding/ascii85\ encoding/base32\ encoding/base64\ @@ -78,6 +77,8 @@ DIRS=\ encoding/pem\ exec\ exp/datafmt\ + exp/ebnf\ + exp/ebnflint\ exp/gui\ exp/gui/x11\ exp/norm\ @@ -173,7 +174,6 @@ DIRS=\ websocket\ xml\ ../cmd/cgo\ - ../cmd/ebnflint\ ../cmd/godoc\ ../cmd/gofix\ ../cmd/gofmt\ @@ -201,6 +201,7 @@ NOTEST+=\ crypto\ crypto/openpgp/error\ crypto/x509/pkix\ + exp/ebnflint\ exp/gui\ exp/gui/x11\ exp/sql/driver\ @@ -220,7 +221,6 @@ NOTEST+=\ testing\ testing/iotest\ ../cmd/cgo\ - ../cmd/ebnflint\ ../cmd/godoc\ ../cmd/gotest\ ../cmd/goyacc\ diff --git a/src/pkg/ebnf/Makefile b/src/pkg/ebnf/Makefile deleted file mode 100644 index f5555d2720..0000000000 --- a/src/pkg/ebnf/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright 2009 The Go Authors. All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -include ../../Make.inc - -TARG=ebnf -GOFILES=\ - ebnf.go\ - parser.go\ - -include ../../Make.pkg diff --git a/src/pkg/ebnf/ebnf.go b/src/pkg/ebnf/ebnf.go deleted file mode 100644 index 2ec7f00800..0000000000 --- a/src/pkg/ebnf/ebnf.go +++ /dev/null @@ -1,269 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package ebnf is a library for EBNF grammars. The input is text ([]byte) -// satisfying the following grammar (represented itself in EBNF): -// -// Production = name "=" [ Expression ] "." . -// Expression = Alternative { "|" Alternative } . -// Alternative = Term { Term } . -// Term = name | token [ "…" token ] | Group | Option | Repetition . -// Group = "(" Expression ")" . -// Option = "[" Expression "]" . -// Repetition = "{" Expression "}" . -// -// A name is a Go identifier, a token is a Go string, and comments -// and white space follow the same rules as for the Go language. -// Production names starting with an uppercase Unicode letter denote -// non-terminal productions (i.e., productions which allow white-space -// and comments between tokens); all other production names denote -// lexical productions. -// -package ebnf - -import ( - "fmt" - "os" - "scanner" - "unicode" - "utf8" -) - -// ---------------------------------------------------------------------------- -// Error handling - -type errorList []os.Error - -func (list errorList) Error() os.Error { - if len(list) == 0 { - return nil - } - return list -} - -func (list errorList) String() string { - switch len(list) { - case 0: - return "no errors" - case 1: - return list[0].String() - } - return fmt.Sprintf("%s (and %d more errors)", list[0], len(list)-1) -} - -func newError(pos scanner.Position, msg string) os.Error { - return os.NewError(fmt.Sprintf("%s: %s", pos, msg)) -} - -// ---------------------------------------------------------------------------- -// Internal representation - -type ( - // An Expression node represents a production expression. - Expression interface { - // Pos is the position of the first character of the syntactic construct - Pos() scanner.Position - } - - // An Alternative node represents a non-empty list of alternative expressions. - Alternative []Expression // x | y | z - - // A Sequence node represents a non-empty list of sequential expressions. - Sequence []Expression // x y z - - // A Name node represents a production name. - Name struct { - StringPos scanner.Position - String string - } - - // A Token node represents a literal. - Token struct { - StringPos scanner.Position - String string - } - - // A List node represents a range of characters. - Range struct { - Begin, End *Token // begin ... end - } - - // A Group node represents a grouped expression. - Group struct { - Lparen scanner.Position - Body Expression // (body) - } - - // An Option node represents an optional expression. - Option struct { - Lbrack scanner.Position - Body Expression // [body] - } - - // A Repetition node represents a repeated expression. - Repetition struct { - Lbrace scanner.Position - Body Expression // {body} - } - - // A Production node represents an EBNF production. - Production struct { - Name *Name - Expr Expression - } - - // A Bad node stands for pieces of source code that lead to a parse error. - Bad struct { - TokPos scanner.Position - Error string // parser error message - } - - // A Grammar is a set of EBNF productions. The map - // is indexed by production name. - // - Grammar map[string]*Production -) - -func (x Alternative) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Alternative -func (x Sequence) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Sequences -func (x *Name) Pos() scanner.Position { return x.StringPos } -func (x *Token) Pos() scanner.Position { return x.StringPos } -func (x *Range) Pos() scanner.Position { return x.Begin.Pos() } -func (x *Group) Pos() scanner.Position { return x.Lparen } -func (x *Option) Pos() scanner.Position { return x.Lbrack } -func (x *Repetition) Pos() scanner.Position { return x.Lbrace } -func (x *Production) Pos() scanner.Position { return x.Name.Pos() } -func (x *Bad) Pos() scanner.Position { return x.TokPos } - -// ---------------------------------------------------------------------------- -// Grammar verification - -func isLexical(name string) bool { - ch, _ := utf8.DecodeRuneInString(name) - return !unicode.IsUpper(ch) -} - -type verifier struct { - errors errorList - worklist []*Production - reached Grammar // set of productions reached from (and including) the root production - grammar Grammar -} - -func (v *verifier) error(pos scanner.Position, msg string) { - v.errors = append(v.errors, newError(pos, msg)) -} - -func (v *verifier) push(prod *Production) { - name := prod.Name.String - if _, found := v.reached[name]; !found { - v.worklist = append(v.worklist, prod) - v.reached[name] = prod - } -} - -func (v *verifier) verifyChar(x *Token) int { - s := x.String - if utf8.RuneCountInString(s) != 1 { - v.error(x.Pos(), "single char expected, found "+s) - return 0 - } - ch, _ := utf8.DecodeRuneInString(s) - return ch -} - -func (v *verifier) verifyExpr(expr Expression, lexical bool) { - switch x := expr.(type) { - case nil: - // empty expression - case Alternative: - for _, e := range x { - v.verifyExpr(e, lexical) - } - case Sequence: - for _, e := range x { - v.verifyExpr(e, lexical) - } - case *Name: - // a production with this name must exist; - // add it to the worklist if not yet processed - if prod, found := v.grammar[x.String]; found { - v.push(prod) - } else { - v.error(x.Pos(), "missing production "+x.String) - } - // within a lexical production references - // to non-lexical productions are invalid - if lexical && !isLexical(x.String) { - v.error(x.Pos(), "reference to non-lexical production "+x.String) - } - case *Token: - // nothing to do for now - case *Range: - i := v.verifyChar(x.Begin) - j := v.verifyChar(x.End) - if i >= j { - v.error(x.Pos(), "decreasing character range") - } - case *Group: - v.verifyExpr(x.Body, lexical) - case *Option: - v.verifyExpr(x.Body, lexical) - case *Repetition: - v.verifyExpr(x.Body, lexical) - case *Bad: - v.error(x.Pos(), x.Error) - default: - panic(fmt.Sprintf("internal error: unexpected type %T", expr)) - } -} - -func (v *verifier) verify(grammar Grammar, start string) { - // find root production - root, found := grammar[start] - if !found { - var noPos scanner.Position - v.error(noPos, "no start production "+start) - return - } - - // initialize verifier - v.worklist = v.worklist[0:0] - v.reached = make(Grammar) - v.grammar = grammar - - // work through the worklist - v.push(root) - for { - n := len(v.worklist) - 1 - if n < 0 { - break - } - prod := v.worklist[n] - v.worklist = v.worklist[0:n] - v.verifyExpr(prod.Expr, isLexical(prod.Name.String)) - } - - // check if all productions were reached - if len(v.reached) < len(v.grammar) { - for name, prod := range v.grammar { - if _, found := v.reached[name]; !found { - v.error(prod.Pos(), name+" is unreachable") - } - } - } -} - -// Verify checks that: -// - all productions used are defined -// - all productions defined are used when beginning at start -// - lexical productions refer only to other lexical productions -// -// Position information is interpreted relative to the file set fset. -// -func Verify(grammar Grammar, start string) os.Error { - var v verifier - v.verify(grammar, start) - return v.errors.Error() -} diff --git a/src/pkg/ebnf/ebnf_test.go b/src/pkg/ebnf/ebnf_test.go deleted file mode 100644 index 8cfd6b9c37..0000000000 --- a/src/pkg/ebnf/ebnf_test.go +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package ebnf - -import ( - "bytes" - "testing" -) - -var goodGrammars = []string{ - `Program = .`, - - `Program = foo . - foo = "foo" .`, - - `Program = "a" | "b" "c" .`, - - `Program = "a" … "z" .`, - - `Program = Song . - Song = { Note } . - Note = Do | (Re | Mi | Fa | So | La) | Ti . - Do = "c" . - Re = "d" . - Mi = "e" . - Fa = "f" . - So = "g" . - La = "a" . - Ti = ti . - ti = "b" .`, -} - -var badGrammars = []string{ - `Program = | .`, - `Program = | b .`, - `Program = a … b .`, - `Program = "a" … .`, - `Program = … "b" .`, - `Program = () .`, - `Program = [] .`, - `Program = {} .`, -} - -func checkGood(t *testing.T, src string) { - grammar, err := Parse("", bytes.NewBuffer([]byte(src))) - if err != nil { - t.Errorf("Parse(%s) failed: %v", src, err) - return - } - if err = Verify(grammar, "Program"); err != nil { - t.Errorf("Verify(%s) failed: %v", src, err) - } -} - -func checkBad(t *testing.T, src string) { - _, err := Parse("", bytes.NewBuffer([]byte(src))) - if err == nil { - t.Errorf("Parse(%s) should have failed", src) - } -} - -func TestGrammars(t *testing.T) { - for _, src := range goodGrammars { - checkGood(t, src) - } - for _, src := range badGrammars { - checkBad(t, src) - } -} diff --git a/src/pkg/ebnf/parser.go b/src/pkg/ebnf/parser.go deleted file mode 100644 index 2dbbefb751..0000000000 --- a/src/pkg/ebnf/parser.go +++ /dev/null @@ -1,191 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package ebnf - -import ( - "io" - "os" - "scanner" - "strconv" -) - -type parser struct { - errors errorList - scanner scanner.Scanner - pos scanner.Position // token position - tok int // one token look-ahead - lit string // token literal -} - -func (p *parser) next() { - p.tok = p.scanner.Scan() - p.pos = p.scanner.Position - p.lit = p.scanner.TokenText() -} - -func (p *parser) error(pos scanner.Position, msg string) { - p.errors = append(p.errors, newError(pos, msg)) -} - -func (p *parser) errorExpected(pos scanner.Position, msg string) { - msg = `expected "` + msg + `"` - if pos.Offset == p.pos.Offset { - // the error happened at the current position; - // make the error message more specific - msg += ", found " + scanner.TokenString(p.tok) - if p.tok < 0 { - msg += " " + p.lit - } - } - p.error(pos, msg) -} - -func (p *parser) expect(tok int) scanner.Position { - pos := p.pos - if p.tok != tok { - p.errorExpected(pos, scanner.TokenString(tok)) - } - p.next() // make progress in any case - return pos -} - -func (p *parser) parseIdentifier() *Name { - pos := p.pos - name := p.lit - p.expect(scanner.Ident) - return &Name{pos, name} -} - -func (p *parser) parseToken() *Token { - pos := p.pos - value := "" - if p.tok == scanner.String { - value, _ = strconv.Unquote(p.lit) - // Unquote may fail with an error, but only if the scanner found - // an illegal string in the first place. In this case the error - // has already been reported. - p.next() - } else { - p.expect(scanner.String) - } - return &Token{pos, value} -} - -// ParseTerm returns nil if no term was found. -func (p *parser) parseTerm() (x Expression) { - pos := p.pos - - switch p.tok { - case scanner.Ident: - x = p.parseIdentifier() - - case scanner.String: - tok := p.parseToken() - x = tok - const ellipsis = '…' // U+2026, the horizontal ellipsis character - if p.tok == ellipsis { - p.next() - x = &Range{tok, p.parseToken()} - } - - case '(': - p.next() - x = &Group{pos, p.parseExpression()} - p.expect(')') - - case '[': - p.next() - x = &Option{pos, p.parseExpression()} - p.expect(']') - - case '{': - p.next() - x = &Repetition{pos, p.parseExpression()} - p.expect('}') - } - - return x -} - -func (p *parser) parseSequence() Expression { - var list Sequence - - for x := p.parseTerm(); x != nil; x = p.parseTerm() { - list = append(list, x) - } - - // no need for a sequence if list.Len() < 2 - switch len(list) { - case 0: - p.errorExpected(p.pos, "term") - return &Bad{p.pos, "term expected"} - case 1: - return list[0] - } - - return list -} - -func (p *parser) parseExpression() Expression { - var list Alternative - - for { - list = append(list, p.parseSequence()) - if p.tok != '|' { - break - } - p.next() - } - // len(list) > 0 - - // no need for an Alternative node if list.Len() < 2 - if len(list) == 1 { - return list[0] - } - - return list -} - -func (p *parser) parseProduction() *Production { - name := p.parseIdentifier() - p.expect('=') - var expr Expression - if p.tok != '.' { - expr = p.parseExpression() - } - p.expect('.') - return &Production{name, expr} -} - -func (p *parser) parse(filename string, src io.Reader) Grammar { - p.scanner.Init(src) - p.scanner.Filename = filename - p.next() // initializes pos, tok, lit - - grammar := make(Grammar) - for p.tok != scanner.EOF { - prod := p.parseProduction() - name := prod.Name.String - if _, found := grammar[name]; !found { - grammar[name] = prod - } else { - p.error(prod.Pos(), name+" declared already") - } - } - - return grammar -} - -// Parse parses a set of EBNF productions from source src. -// It returns a set of productions. Errors are reported -// for incorrect syntax and if a production is declared -// more than once; the filename is used only for error -// positions. -// -func Parse(filename string, src io.Reader) (Grammar, os.Error) { - var p parser - grammar := p.parse(filename, src) - return grammar, p.errors.Error() -} diff --git a/src/pkg/exp/ebnf/Makefile b/src/pkg/exp/ebnf/Makefile new file mode 100644 index 0000000000..844de675cb --- /dev/null +++ b/src/pkg/exp/ebnf/Makefile @@ -0,0 +1,12 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../../Make.inc + +TARG=exp/ebnf +GOFILES=\ + ebnf.go\ + parser.go\ + +include ../../../Make.pkg diff --git a/src/pkg/exp/ebnf/ebnf.go b/src/pkg/exp/ebnf/ebnf.go new file mode 100644 index 0000000000..2ec7f00800 --- /dev/null +++ b/src/pkg/exp/ebnf/ebnf.go @@ -0,0 +1,269 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package ebnf is a library for EBNF grammars. The input is text ([]byte) +// satisfying the following grammar (represented itself in EBNF): +// +// Production = name "=" [ Expression ] "." . +// Expression = Alternative { "|" Alternative } . +// Alternative = Term { Term } . +// Term = name | token [ "…" token ] | Group | Option | Repetition . +// Group = "(" Expression ")" . +// Option = "[" Expression "]" . +// Repetition = "{" Expression "}" . +// +// A name is a Go identifier, a token is a Go string, and comments +// and white space follow the same rules as for the Go language. +// Production names starting with an uppercase Unicode letter denote +// non-terminal productions (i.e., productions which allow white-space +// and comments between tokens); all other production names denote +// lexical productions. +// +package ebnf + +import ( + "fmt" + "os" + "scanner" + "unicode" + "utf8" +) + +// ---------------------------------------------------------------------------- +// Error handling + +type errorList []os.Error + +func (list errorList) Error() os.Error { + if len(list) == 0 { + return nil + } + return list +} + +func (list errorList) String() string { + switch len(list) { + case 0: + return "no errors" + case 1: + return list[0].String() + } + return fmt.Sprintf("%s (and %d more errors)", list[0], len(list)-1) +} + +func newError(pos scanner.Position, msg string) os.Error { + return os.NewError(fmt.Sprintf("%s: %s", pos, msg)) +} + +// ---------------------------------------------------------------------------- +// Internal representation + +type ( + // An Expression node represents a production expression. + Expression interface { + // Pos is the position of the first character of the syntactic construct + Pos() scanner.Position + } + + // An Alternative node represents a non-empty list of alternative expressions. + Alternative []Expression // x | y | z + + // A Sequence node represents a non-empty list of sequential expressions. + Sequence []Expression // x y z + + // A Name node represents a production name. + Name struct { + StringPos scanner.Position + String string + } + + // A Token node represents a literal. + Token struct { + StringPos scanner.Position + String string + } + + // A List node represents a range of characters. + Range struct { + Begin, End *Token // begin ... end + } + + // A Group node represents a grouped expression. + Group struct { + Lparen scanner.Position + Body Expression // (body) + } + + // An Option node represents an optional expression. + Option struct { + Lbrack scanner.Position + Body Expression // [body] + } + + // A Repetition node represents a repeated expression. + Repetition struct { + Lbrace scanner.Position + Body Expression // {body} + } + + // A Production node represents an EBNF production. + Production struct { + Name *Name + Expr Expression + } + + // A Bad node stands for pieces of source code that lead to a parse error. + Bad struct { + TokPos scanner.Position + Error string // parser error message + } + + // A Grammar is a set of EBNF productions. The map + // is indexed by production name. + // + Grammar map[string]*Production +) + +func (x Alternative) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Alternative +func (x Sequence) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Sequences +func (x *Name) Pos() scanner.Position { return x.StringPos } +func (x *Token) Pos() scanner.Position { return x.StringPos } +func (x *Range) Pos() scanner.Position { return x.Begin.Pos() } +func (x *Group) Pos() scanner.Position { return x.Lparen } +func (x *Option) Pos() scanner.Position { return x.Lbrack } +func (x *Repetition) Pos() scanner.Position { return x.Lbrace } +func (x *Production) Pos() scanner.Position { return x.Name.Pos() } +func (x *Bad) Pos() scanner.Position { return x.TokPos } + +// ---------------------------------------------------------------------------- +// Grammar verification + +func isLexical(name string) bool { + ch, _ := utf8.DecodeRuneInString(name) + return !unicode.IsUpper(ch) +} + +type verifier struct { + errors errorList + worklist []*Production + reached Grammar // set of productions reached from (and including) the root production + grammar Grammar +} + +func (v *verifier) error(pos scanner.Position, msg string) { + v.errors = append(v.errors, newError(pos, msg)) +} + +func (v *verifier) push(prod *Production) { + name := prod.Name.String + if _, found := v.reached[name]; !found { + v.worklist = append(v.worklist, prod) + v.reached[name] = prod + } +} + +func (v *verifier) verifyChar(x *Token) int { + s := x.String + if utf8.RuneCountInString(s) != 1 { + v.error(x.Pos(), "single char expected, found "+s) + return 0 + } + ch, _ := utf8.DecodeRuneInString(s) + return ch +} + +func (v *verifier) verifyExpr(expr Expression, lexical bool) { + switch x := expr.(type) { + case nil: + // empty expression + case Alternative: + for _, e := range x { + v.verifyExpr(e, lexical) + } + case Sequence: + for _, e := range x { + v.verifyExpr(e, lexical) + } + case *Name: + // a production with this name must exist; + // add it to the worklist if not yet processed + if prod, found := v.grammar[x.String]; found { + v.push(prod) + } else { + v.error(x.Pos(), "missing production "+x.String) + } + // within a lexical production references + // to non-lexical productions are invalid + if lexical && !isLexical(x.String) { + v.error(x.Pos(), "reference to non-lexical production "+x.String) + } + case *Token: + // nothing to do for now + case *Range: + i := v.verifyChar(x.Begin) + j := v.verifyChar(x.End) + if i >= j { + v.error(x.Pos(), "decreasing character range") + } + case *Group: + v.verifyExpr(x.Body, lexical) + case *Option: + v.verifyExpr(x.Body, lexical) + case *Repetition: + v.verifyExpr(x.Body, lexical) + case *Bad: + v.error(x.Pos(), x.Error) + default: + panic(fmt.Sprintf("internal error: unexpected type %T", expr)) + } +} + +func (v *verifier) verify(grammar Grammar, start string) { + // find root production + root, found := grammar[start] + if !found { + var noPos scanner.Position + v.error(noPos, "no start production "+start) + return + } + + // initialize verifier + v.worklist = v.worklist[0:0] + v.reached = make(Grammar) + v.grammar = grammar + + // work through the worklist + v.push(root) + for { + n := len(v.worklist) - 1 + if n < 0 { + break + } + prod := v.worklist[n] + v.worklist = v.worklist[0:n] + v.verifyExpr(prod.Expr, isLexical(prod.Name.String)) + } + + // check if all productions were reached + if len(v.reached) < len(v.grammar) { + for name, prod := range v.grammar { + if _, found := v.reached[name]; !found { + v.error(prod.Pos(), name+" is unreachable") + } + } + } +} + +// Verify checks that: +// - all productions used are defined +// - all productions defined are used when beginning at start +// - lexical productions refer only to other lexical productions +// +// Position information is interpreted relative to the file set fset. +// +func Verify(grammar Grammar, start string) os.Error { + var v verifier + v.verify(grammar, start) + return v.errors.Error() +} diff --git a/src/pkg/exp/ebnf/ebnf_test.go b/src/pkg/exp/ebnf/ebnf_test.go new file mode 100644 index 0000000000..8cfd6b9c37 --- /dev/null +++ b/src/pkg/exp/ebnf/ebnf_test.go @@ -0,0 +1,71 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ebnf + +import ( + "bytes" + "testing" +) + +var goodGrammars = []string{ + `Program = .`, + + `Program = foo . + foo = "foo" .`, + + `Program = "a" | "b" "c" .`, + + `Program = "a" … "z" .`, + + `Program = Song . + Song = { Note } . + Note = Do | (Re | Mi | Fa | So | La) | Ti . + Do = "c" . + Re = "d" . + Mi = "e" . + Fa = "f" . + So = "g" . + La = "a" . + Ti = ti . + ti = "b" .`, +} + +var badGrammars = []string{ + `Program = | .`, + `Program = | b .`, + `Program = a … b .`, + `Program = "a" … .`, + `Program = … "b" .`, + `Program = () .`, + `Program = [] .`, + `Program = {} .`, +} + +func checkGood(t *testing.T, src string) { + grammar, err := Parse("", bytes.NewBuffer([]byte(src))) + if err != nil { + t.Errorf("Parse(%s) failed: %v", src, err) + return + } + if err = Verify(grammar, "Program"); err != nil { + t.Errorf("Verify(%s) failed: %v", src, err) + } +} + +func checkBad(t *testing.T, src string) { + _, err := Parse("", bytes.NewBuffer([]byte(src))) + if err == nil { + t.Errorf("Parse(%s) should have failed", src) + } +} + +func TestGrammars(t *testing.T) { + for _, src := range goodGrammars { + checkGood(t, src) + } + for _, src := range badGrammars { + checkBad(t, src) + } +} diff --git a/src/pkg/exp/ebnf/parser.go b/src/pkg/exp/ebnf/parser.go new file mode 100644 index 0000000000..2dbbefb751 --- /dev/null +++ b/src/pkg/exp/ebnf/parser.go @@ -0,0 +1,191 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ebnf + +import ( + "io" + "os" + "scanner" + "strconv" +) + +type parser struct { + errors errorList + scanner scanner.Scanner + pos scanner.Position // token position + tok int // one token look-ahead + lit string // token literal +} + +func (p *parser) next() { + p.tok = p.scanner.Scan() + p.pos = p.scanner.Position + p.lit = p.scanner.TokenText() +} + +func (p *parser) error(pos scanner.Position, msg string) { + p.errors = append(p.errors, newError(pos, msg)) +} + +func (p *parser) errorExpected(pos scanner.Position, msg string) { + msg = `expected "` + msg + `"` + if pos.Offset == p.pos.Offset { + // the error happened at the current position; + // make the error message more specific + msg += ", found " + scanner.TokenString(p.tok) + if p.tok < 0 { + msg += " " + p.lit + } + } + p.error(pos, msg) +} + +func (p *parser) expect(tok int) scanner.Position { + pos := p.pos + if p.tok != tok { + p.errorExpected(pos, scanner.TokenString(tok)) + } + p.next() // make progress in any case + return pos +} + +func (p *parser) parseIdentifier() *Name { + pos := p.pos + name := p.lit + p.expect(scanner.Ident) + return &Name{pos, name} +} + +func (p *parser) parseToken() *Token { + pos := p.pos + value := "" + if p.tok == scanner.String { + value, _ = strconv.Unquote(p.lit) + // Unquote may fail with an error, but only if the scanner found + // an illegal string in the first place. In this case the error + // has already been reported. + p.next() + } else { + p.expect(scanner.String) + } + return &Token{pos, value} +} + +// ParseTerm returns nil if no term was found. +func (p *parser) parseTerm() (x Expression) { + pos := p.pos + + switch p.tok { + case scanner.Ident: + x = p.parseIdentifier() + + case scanner.String: + tok := p.parseToken() + x = tok + const ellipsis = '…' // U+2026, the horizontal ellipsis character + if p.tok == ellipsis { + p.next() + x = &Range{tok, p.parseToken()} + } + + case '(': + p.next() + x = &Group{pos, p.parseExpression()} + p.expect(')') + + case '[': + p.next() + x = &Option{pos, p.parseExpression()} + p.expect(']') + + case '{': + p.next() + x = &Repetition{pos, p.parseExpression()} + p.expect('}') + } + + return x +} + +func (p *parser) parseSequence() Expression { + var list Sequence + + for x := p.parseTerm(); x != nil; x = p.parseTerm() { + list = append(list, x) + } + + // no need for a sequence if list.Len() < 2 + switch len(list) { + case 0: + p.errorExpected(p.pos, "term") + return &Bad{p.pos, "term expected"} + case 1: + return list[0] + } + + return list +} + +func (p *parser) parseExpression() Expression { + var list Alternative + + for { + list = append(list, p.parseSequence()) + if p.tok != '|' { + break + } + p.next() + } + // len(list) > 0 + + // no need for an Alternative node if list.Len() < 2 + if len(list) == 1 { + return list[0] + } + + return list +} + +func (p *parser) parseProduction() *Production { + name := p.parseIdentifier() + p.expect('=') + var expr Expression + if p.tok != '.' { + expr = p.parseExpression() + } + p.expect('.') + return &Production{name, expr} +} + +func (p *parser) parse(filename string, src io.Reader) Grammar { + p.scanner.Init(src) + p.scanner.Filename = filename + p.next() // initializes pos, tok, lit + + grammar := make(Grammar) + for p.tok != scanner.EOF { + prod := p.parseProduction() + name := prod.Name.String + if _, found := grammar[name]; !found { + grammar[name] = prod + } else { + p.error(prod.Pos(), name+" declared already") + } + } + + return grammar +} + +// Parse parses a set of EBNF productions from source src. +// It returns a set of productions. Errors are reported +// for incorrect syntax and if a production is declared +// more than once; the filename is used only for error +// positions. +// +func Parse(filename string, src io.Reader) (Grammar, os.Error) { + var p parser + grammar := p.parse(filename, src) + return grammar, p.errors.Error() +} diff --git a/src/pkg/exp/ebnflint/Makefile b/src/pkg/exp/ebnflint/Makefile new file mode 100644 index 0000000000..2057b07d58 --- /dev/null +++ b/src/pkg/exp/ebnflint/Makefile @@ -0,0 +1,15 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../../Make.inc + +TARG=ebnflint +GOFILES=\ + ebnflint.go\ + +include ../../../Make.cmd + +test: $(TARG) + $(TARG) -start="SourceFile" "$(GOROOT)"/doc/go_spec.html + diff --git a/src/pkg/exp/ebnflint/doc.go b/src/pkg/exp/ebnflint/doc.go new file mode 100644 index 0000000000..f35976eea7 --- /dev/null +++ b/src/pkg/exp/ebnflint/doc.go @@ -0,0 +1,22 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* + +Ebnflint verifies that EBNF productions are consistent and gramatically correct. +It reads them from an HTML document such as the Go specification. + +Grammar productions are grouped in boxes demarcated by the HTML elements +
+	
+ + +Usage: + ebnflint [--start production] [file] + +The --start flag specifies the name of the start production for +the grammar; it defaults to "Start". + +*/ +package documentation diff --git a/src/pkg/exp/ebnflint/ebnflint.go b/src/pkg/exp/ebnflint/ebnflint.go new file mode 100644 index 0000000000..c827716c44 --- /dev/null +++ b/src/pkg/exp/ebnflint/ebnflint.go @@ -0,0 +1,109 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "bytes" + "exp/ebnf" + "flag" + "fmt" + "go/scanner" + "go/token" + "io/ioutil" + "os" + "path/filepath" +) + +var fset = token.NewFileSet() +var start = flag.String("start", "Start", "name of start production") + +func usage() { + fmt.Fprintf(os.Stderr, "usage: ebnflint [flags] [filename]\n") + flag.PrintDefaults() + os.Exit(1) +} + +// Markers around EBNF sections in .html files +var ( + open = []byte(`
`)
+	close = []byte(`
`) +) + +func report(err os.Error) { + scanner.PrintError(os.Stderr, err) + os.Exit(1) +} + +func extractEBNF(src []byte) []byte { + var buf bytes.Buffer + + for { + // i = beginning of EBNF text + i := bytes.Index(src, open) + if i < 0 { + break // no EBNF found - we are done + } + i += len(open) + + // write as many newlines as found in the excluded text + // to maintain correct line numbers in error messages + for _, ch := range src[0:i] { + if ch == '\n' { + buf.WriteByte('\n') + } + } + + // j = end of EBNF text (or end of source) + j := bytes.Index(src[i:], close) // close marker + if j < 0 { + j = len(src) - i + } + j += i + + // copy EBNF text + buf.Write(src[i:j]) + + // advance + src = src[j:] + } + + return buf.Bytes() +} + +func main() { + flag.Parse() + + var ( + filename string + src []byte + err os.Error + ) + switch flag.NArg() { + case 0: + filename = "" + src, err = ioutil.ReadAll(os.Stdin) + case 1: + filename = flag.Arg(0) + src, err = ioutil.ReadFile(filename) + default: + usage() + } + if err != nil { + report(err) + } + + if filepath.Ext(filename) == ".html" || bytes.Index(src, open) >= 0 { + src = extractEBNF(src) + } + + grammar, err := ebnf.Parse(filename, bytes.NewBuffer(src)) + if err != nil { + report(err) + } + + if err = ebnf.Verify(grammar, *start); err != nil { + report(err) + } +} -- cgit v1.3-5-g9baa