From e58a77809d5d31317f64bfc5b8a96e2fb093bae4 Mon Sep 17 00:00:00 2001
From: Robert Griesemer <gri@golang.org>
Date: Tue, 11 Oct 2011 17:43:10 -0700
Subject: ebnf, ebnflint: move under exp

pkg/ebnf -> pkg/exp/ebnf
cmd/ebnflint -> pkg/exp/ebnflint

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5188042
---
 src/pkg/Makefile                 |   6 +-
 src/pkg/ebnf/Makefile            |  12 --
 src/pkg/ebnf/ebnf.go             | 269 ---------------------------------------
 src/pkg/ebnf/ebnf_test.go        |  71 -----------
 src/pkg/ebnf/parser.go           | 191 ---------------------------
 src/pkg/exp/ebnf/Makefile        |  12 ++
 src/pkg/exp/ebnf/ebnf.go         | 269 +++++++++++++++++++++++++++++++++++++++
 src/pkg/exp/ebnf/ebnf_test.go    |  71 +++++++++++
 src/pkg/exp/ebnf/parser.go       | 191 +++++++++++++++++++++++++++
 src/pkg/exp/ebnflint/Makefile    |  15 +++
 src/pkg/exp/ebnflint/doc.go      |  22 ++++
 src/pkg/exp/ebnflint/ebnflint.go | 109 ++++++++++++++++
 12 files changed, 692 insertions(+), 546 deletions(-)
 delete mode 100644 src/pkg/ebnf/Makefile
 delete mode 100644 src/pkg/ebnf/ebnf.go
 delete mode 100644 src/pkg/ebnf/ebnf_test.go
 delete mode 100644 src/pkg/ebnf/parser.go
 create mode 100644 src/pkg/exp/ebnf/Makefile
 create mode 100644 src/pkg/exp/ebnf/ebnf.go
 create mode 100644 src/pkg/exp/ebnf/ebnf_test.go
 create mode 100644 src/pkg/exp/ebnf/parser.go
 create mode 100644 src/pkg/exp/ebnflint/Makefile
 create mode 100644 src/pkg/exp/ebnflint/doc.go
 create mode 100644 src/pkg/exp/ebnflint/ebnflint.go

(limited to 'src/pkg')

diff --git a/src/pkg/Makefile b/src/pkg/Makefile
index e833fcfbba..321b463b13 100644
--- a/src/pkg/Makefile
+++ b/src/pkg/Makefile
@@ -68,7 +68,6 @@ DIRS=\
 	debug/elf\
 	debug/gosym\
 	debug/pe\
-	ebnf\
 	encoding/ascii85\
 	encoding/base32\
 	encoding/base64\
@@ -78,6 +77,8 @@ DIRS=\
 	encoding/pem\
 	exec\
 	exp/datafmt\
+	exp/ebnf\
+	exp/ebnflint\
 	exp/gui\
 	exp/gui/x11\
 	exp/norm\
@@ -173,7 +174,6 @@ DIRS=\
 	websocket\
 	xml\
 	../cmd/cgo\
-	../cmd/ebnflint\
 	../cmd/godoc\
 	../cmd/gofix\
 	../cmd/gofmt\
@@ -201,6 +201,7 @@ NOTEST+=\
 	crypto\
 	crypto/openpgp/error\
 	crypto/x509/pkix\
+	exp/ebnflint\
 	exp/gui\
 	exp/gui/x11\
 	exp/sql/driver\
@@ -220,7 +221,6 @@ NOTEST+=\
 	testing\
 	testing/iotest\
 	../cmd/cgo\
-	../cmd/ebnflint\
 	../cmd/godoc\
 	../cmd/gotest\
 	../cmd/goyacc\
diff --git a/src/pkg/ebnf/Makefile b/src/pkg/ebnf/Makefile
deleted file mode 100644
index f5555d2720..0000000000
--- a/src/pkg/ebnf/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-# Copyright 2009 The Go Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style
-# license that can be found in the LICENSE file.
-
-include ../../Make.inc
-
-TARG=ebnf
-GOFILES=\
-	ebnf.go\
-	parser.go\
-
-include ../../Make.pkg
diff --git a/src/pkg/ebnf/ebnf.go b/src/pkg/ebnf/ebnf.go
deleted file mode 100644
index 2ec7f00800..0000000000
--- a/src/pkg/ebnf/ebnf.go
+++ /dev/null
@@ -1,269 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package ebnf is a library for EBNF grammars. The input is text ([]byte)
-// satisfying the following grammar (represented itself in EBNF):
-//
-//	Production  = name "=" [ Expression ] "." .
-//	Expression  = Alternative { "|" Alternative } .
-//	Alternative = Term { Term } .
-//	Term        = name | token [ "…" token ] | Group | Option | Repetition .
-//	Group       = "(" Expression ")" .
-//	Option      = "[" Expression "]" .
-//	Repetition  = "{" Expression "}" .
-//
-// A name is a Go identifier, a token is a Go string, and comments
-// and white space follow the same rules as for the Go language.
-// Production names starting with an uppercase Unicode letter denote
-// non-terminal productions (i.e., productions which allow white-space
-// and comments between tokens); all other production names denote
-// lexical productions.
-//
-package ebnf
-
-import (
-	"fmt"
-	"os"
-	"scanner"
-	"unicode"
-	"utf8"
-)
-
-// ----------------------------------------------------------------------------
-// Error handling
-
-type errorList []os.Error
-
-func (list errorList) Error() os.Error {
-	if len(list) == 0 {
-		return nil
-	}
-	return list
-}
-
-func (list errorList) String() string {
-	switch len(list) {
-	case 0:
-		return "no errors"
-	case 1:
-		return list[0].String()
-	}
-	return fmt.Sprintf("%s (and %d more errors)", list[0], len(list)-1)
-}
-
-func newError(pos scanner.Position, msg string) os.Error {
-	return os.NewError(fmt.Sprintf("%s: %s", pos, msg))
-}
-
-// ----------------------------------------------------------------------------
-// Internal representation
-
-type (
-	// An Expression node represents a production expression.
-	Expression interface {
-		// Pos is the position of the first character of the syntactic construct
-		Pos() scanner.Position
-	}
-
-	// An Alternative node represents a non-empty list of alternative expressions.
-	Alternative []Expression // x | y | z
-
-	// A Sequence node represents a non-empty list of sequential expressions.
-	Sequence []Expression // x y z
-
-	// A Name node represents a production name.
-	Name struct {
-		StringPos scanner.Position
-		String    string
-	}
-
-	// A Token node represents a literal.
-	Token struct {
-		StringPos scanner.Position
-		String    string
-	}
-
-	// A List node represents a range of characters.
-	Range struct {
-		Begin, End *Token // begin ... end
-	}
-
-	// A Group node represents a grouped expression.
-	Group struct {
-		Lparen scanner.Position
-		Body   Expression // (body)
-	}
-
-	// An Option node represents an optional expression.
-	Option struct {
-		Lbrack scanner.Position
-		Body   Expression // [body]
-	}
-
-	// A Repetition node represents a repeated expression.
-	Repetition struct {
-		Lbrace scanner.Position
-		Body   Expression // {body}
-	}
-
-	// A Production node represents an EBNF production.
-	Production struct {
-		Name *Name
-		Expr Expression
-	}
-
-	// A Bad node stands for pieces of source code that lead to a parse error.
-	Bad struct {
-		TokPos scanner.Position
-		Error  string // parser error message
-	}
-
-	// A Grammar is a set of EBNF productions. The map
-	// is indexed by production name.
-	//
-	Grammar map[string]*Production
-)
-
-func (x Alternative) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Alternative
-func (x Sequence) Pos() scanner.Position    { return x[0].Pos() } // the parser always generates non-empty Sequences
-func (x *Name) Pos() scanner.Position       { return x.StringPos }
-func (x *Token) Pos() scanner.Position      { return x.StringPos }
-func (x *Range) Pos() scanner.Position      { return x.Begin.Pos() }
-func (x *Group) Pos() scanner.Position      { return x.Lparen }
-func (x *Option) Pos() scanner.Position     { return x.Lbrack }
-func (x *Repetition) Pos() scanner.Position { return x.Lbrace }
-func (x *Production) Pos() scanner.Position { return x.Name.Pos() }
-func (x *Bad) Pos() scanner.Position        { return x.TokPos }
-
-// ----------------------------------------------------------------------------
-// Grammar verification
-
-func isLexical(name string) bool {
-	ch, _ := utf8.DecodeRuneInString(name)
-	return !unicode.IsUpper(ch)
-}
-
-type verifier struct {
-	errors   errorList
-	worklist []*Production
-	reached  Grammar // set of productions reached from (and including) the root production
-	grammar  Grammar
-}
-
-func (v *verifier) error(pos scanner.Position, msg string) {
-	v.errors = append(v.errors, newError(pos, msg))
-}
-
-func (v *verifier) push(prod *Production) {
-	name := prod.Name.String
-	if _, found := v.reached[name]; !found {
-		v.worklist = append(v.worklist, prod)
-		v.reached[name] = prod
-	}
-}
-
-func (v *verifier) verifyChar(x *Token) int {
-	s := x.String
-	if utf8.RuneCountInString(s) != 1 {
-		v.error(x.Pos(), "single char expected, found "+s)
-		return 0
-	}
-	ch, _ := utf8.DecodeRuneInString(s)
-	return ch
-}
-
-func (v *verifier) verifyExpr(expr Expression, lexical bool) {
-	switch x := expr.(type) {
-	case nil:
-		// empty expression
-	case Alternative:
-		for _, e := range x {
-			v.verifyExpr(e, lexical)
-		}
-	case Sequence:
-		for _, e := range x {
-			v.verifyExpr(e, lexical)
-		}
-	case *Name:
-		// a production with this name must exist;
-		// add it to the worklist if not yet processed
-		if prod, found := v.grammar[x.String]; found {
-			v.push(prod)
-		} else {
-			v.error(x.Pos(), "missing production "+x.String)
-		}
-		// within a lexical production references
-		// to non-lexical productions are invalid
-		if lexical && !isLexical(x.String) {
-			v.error(x.Pos(), "reference to non-lexical production "+x.String)
-		}
-	case *Token:
-		// nothing to do for now
-	case *Range:
-		i := v.verifyChar(x.Begin)
-		j := v.verifyChar(x.End)
-		if i >= j {
-			v.error(x.Pos(), "decreasing character range")
-		}
-	case *Group:
-		v.verifyExpr(x.Body, lexical)
-	case *Option:
-		v.verifyExpr(x.Body, lexical)
-	case *Repetition:
-		v.verifyExpr(x.Body, lexical)
-	case *Bad:
-		v.error(x.Pos(), x.Error)
-	default:
-		panic(fmt.Sprintf("internal error: unexpected type %T", expr))
-	}
-}
-
-func (v *verifier) verify(grammar Grammar, start string) {
-	// find root production
-	root, found := grammar[start]
-	if !found {
-		var noPos scanner.Position
-		v.error(noPos, "no start production "+start)
-		return
-	}
-
-	// initialize verifier
-	v.worklist = v.worklist[0:0]
-	v.reached = make(Grammar)
-	v.grammar = grammar
-
-	// work through the worklist
-	v.push(root)
-	for {
-		n := len(v.worklist) - 1
-		if n < 0 {
-			break
-		}
-		prod := v.worklist[n]
-		v.worklist = v.worklist[0:n]
-		v.verifyExpr(prod.Expr, isLexical(prod.Name.String))
-	}
-
-	// check if all productions were reached
-	if len(v.reached) < len(v.grammar) {
-		for name, prod := range v.grammar {
-			if _, found := v.reached[name]; !found {
-				v.error(prod.Pos(), name+" is unreachable")
-			}
-		}
-	}
-}
-
-// Verify checks that:
-//	- all productions used are defined
-//	- all productions defined are used when beginning at start
-//	- lexical productions refer only to other lexical productions
-//
-// Position information is interpreted relative to the file set fset.
-//
-func Verify(grammar Grammar, start string) os.Error {
-	var v verifier
-	v.verify(grammar, start)
-	return v.errors.Error()
-}
diff --git a/src/pkg/ebnf/ebnf_test.go b/src/pkg/ebnf/ebnf_test.go
deleted file mode 100644
index 8cfd6b9c37..0000000000
--- a/src/pkg/ebnf/ebnf_test.go
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ebnf
-
-import (
-	"bytes"
-	"testing"
-)
-
-var goodGrammars = []string{
-	`Program = .`,
-
-	`Program = foo .
-	 foo = "foo" .`,
-
-	`Program = "a" | "b" "c" .`,
-
-	`Program = "a" … "z" .`,
-
-	`Program = Song .
-	 Song = { Note } .
-	 Note = Do | (Re | Mi | Fa | So | La) | Ti .
-	 Do = "c" .
-	 Re = "d" .
-	 Mi = "e" .
-	 Fa = "f" .
-	 So = "g" .
-	 La = "a" .
-	 Ti = ti .
-	 ti = "b" .`,
-}
-
-var badGrammars = []string{
-	`Program = | .`,
-	`Program = | b .`,
-	`Program = a … b .`,
-	`Program = "a" … .`,
-	`Program = … "b" .`,
-	`Program = () .`,
-	`Program = [] .`,
-	`Program = {} .`,
-}
-
-func checkGood(t *testing.T, src string) {
-	grammar, err := Parse("", bytes.NewBuffer([]byte(src)))
-	if err != nil {
-		t.Errorf("Parse(%s) failed: %v", src, err)
-		return
-	}
-	if err = Verify(grammar, "Program"); err != nil {
-		t.Errorf("Verify(%s) failed: %v", src, err)
-	}
-}
-
-func checkBad(t *testing.T, src string) {
-	_, err := Parse("", bytes.NewBuffer([]byte(src)))
-	if err == nil {
-		t.Errorf("Parse(%s) should have failed", src)
-	}
-}
-
-func TestGrammars(t *testing.T) {
-	for _, src := range goodGrammars {
-		checkGood(t, src)
-	}
-	for _, src := range badGrammars {
-		checkBad(t, src)
-	}
-}
diff --git a/src/pkg/ebnf/parser.go b/src/pkg/ebnf/parser.go
deleted file mode 100644
index 2dbbefb751..0000000000
--- a/src/pkg/ebnf/parser.go
+++ /dev/null
@@ -1,191 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ebnf
-
-import (
-	"io"
-	"os"
-	"scanner"
-	"strconv"
-)
-
-type parser struct {
-	errors  errorList
-	scanner scanner.Scanner
-	pos     scanner.Position // token position
-	tok     int              // one token look-ahead
-	lit     string           // token literal
-}
-
-func (p *parser) next() {
-	p.tok = p.scanner.Scan()
-	p.pos = p.scanner.Position
-	p.lit = p.scanner.TokenText()
-}
-
-func (p *parser) error(pos scanner.Position, msg string) {
-	p.errors = append(p.errors, newError(pos, msg))
-}
-
-func (p *parser) errorExpected(pos scanner.Position, msg string) {
-	msg = `expected "` + msg + `"`
-	if pos.Offset == p.pos.Offset {
-		// the error happened at the current position;
-		// make the error message more specific
-		msg += ", found " + scanner.TokenString(p.tok)
-		if p.tok < 0 {
-			msg += " " + p.lit
-		}
-	}
-	p.error(pos, msg)
-}
-
-func (p *parser) expect(tok int) scanner.Position {
-	pos := p.pos
-	if p.tok != tok {
-		p.errorExpected(pos, scanner.TokenString(tok))
-	}
-	p.next() // make progress in any case
-	return pos
-}
-
-func (p *parser) parseIdentifier() *Name {
-	pos := p.pos
-	name := p.lit
-	p.expect(scanner.Ident)
-	return &Name{pos, name}
-}
-
-func (p *parser) parseToken() *Token {
-	pos := p.pos
-	value := ""
-	if p.tok == scanner.String {
-		value, _ = strconv.Unquote(p.lit)
-		// Unquote may fail with an error, but only if the scanner found
-		// an illegal string in the first place. In this case the error
-		// has already been reported.
-		p.next()
-	} else {
-		p.expect(scanner.String)
-	}
-	return &Token{pos, value}
-}
-
-// ParseTerm returns nil if no term was found.
-func (p *parser) parseTerm() (x Expression) {
-	pos := p.pos
-
-	switch p.tok {
-	case scanner.Ident:
-		x = p.parseIdentifier()
-
-	case scanner.String:
-		tok := p.parseToken()
-		x = tok
-		const ellipsis = '…' // U+2026, the horizontal ellipsis character
-		if p.tok == ellipsis {
-			p.next()
-			x = &Range{tok, p.parseToken()}
-		}
-
-	case '(':
-		p.next()
-		x = &Group{pos, p.parseExpression()}
-		p.expect(')')
-
-	case '[':
-		p.next()
-		x = &Option{pos, p.parseExpression()}
-		p.expect(']')
-
-	case '{':
-		p.next()
-		x = &Repetition{pos, p.parseExpression()}
-		p.expect('}')
-	}
-
-	return x
-}
-
-func (p *parser) parseSequence() Expression {
-	var list Sequence
-
-	for x := p.parseTerm(); x != nil; x = p.parseTerm() {
-		list = append(list, x)
-	}
-
-	// no need for a sequence if list.Len() < 2
-	switch len(list) {
-	case 0:
-		p.errorExpected(p.pos, "term")
-		return &Bad{p.pos, "term expected"}
-	case 1:
-		return list[0]
-	}
-
-	return list
-}
-
-func (p *parser) parseExpression() Expression {
-	var list Alternative
-
-	for {
-		list = append(list, p.parseSequence())
-		if p.tok != '|' {
-			break
-		}
-		p.next()
-	}
-	// len(list) > 0
-
-	// no need for an Alternative node if list.Len() < 2
-	if len(list) == 1 {
-		return list[0]
-	}
-
-	return list
-}
-
-func (p *parser) parseProduction() *Production {
-	name := p.parseIdentifier()
-	p.expect('=')
-	var expr Expression
-	if p.tok != '.' {
-		expr = p.parseExpression()
-	}
-	p.expect('.')
-	return &Production{name, expr}
-}
-
-func (p *parser) parse(filename string, src io.Reader) Grammar {
-	p.scanner.Init(src)
-	p.scanner.Filename = filename
-	p.next() // initializes pos, tok, lit
-
-	grammar := make(Grammar)
-	for p.tok != scanner.EOF {
-		prod := p.parseProduction()
-		name := prod.Name.String
-		if _, found := grammar[name]; !found {
-			grammar[name] = prod
-		} else {
-			p.error(prod.Pos(), name+" declared already")
-		}
-	}
-
-	return grammar
-}
-
-// Parse parses a set of EBNF productions from source src.
-// It returns a set of productions. Errors are reported
-// for incorrect syntax and if a production is declared
-// more than once; the filename is used only for error
-// positions.
-//
-func Parse(filename string, src io.Reader) (Grammar, os.Error) {
-	var p parser
-	grammar := p.parse(filename, src)
-	return grammar, p.errors.Error()
-}
diff --git a/src/pkg/exp/ebnf/Makefile b/src/pkg/exp/ebnf/Makefile
new file mode 100644
index 0000000000..844de675cb
--- /dev/null
+++ b/src/pkg/exp/ebnf/Makefile
@@ -0,0 +1,12 @@
+# Copyright 2009 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include ../../../Make.inc
+
+TARG=exp/ebnf
+GOFILES=\
+	ebnf.go\
+	parser.go\
+
+include ../../../Make.pkg
diff --git a/src/pkg/exp/ebnf/ebnf.go b/src/pkg/exp/ebnf/ebnf.go
new file mode 100644
index 0000000000..2ec7f00800
--- /dev/null
+++ b/src/pkg/exp/ebnf/ebnf.go
@@ -0,0 +1,269 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package ebnf is a library for EBNF grammars. The input is text ([]byte)
+// satisfying the following grammar (represented itself in EBNF):
+//
+//	Production  = name "=" [ Expression ] "." .
+//	Expression  = Alternative { "|" Alternative } .
+//	Alternative = Term { Term } .
+//	Term        = name | token [ "…" token ] | Group | Option | Repetition .
+//	Group       = "(" Expression ")" .
+//	Option      = "[" Expression "]" .
+//	Repetition  = "{" Expression "}" .
+//
+// A name is a Go identifier, a token is a Go string, and comments
+// and white space follow the same rules as for the Go language.
+// Production names starting with an uppercase Unicode letter denote
+// non-terminal productions (i.e., productions which allow white-space
+// and comments between tokens); all other production names denote
+// lexical productions.
+//
+package ebnf
+
+import (
+	"fmt"
+	"os"
+	"scanner"
+	"unicode"
+	"utf8"
+)
+
+// ----------------------------------------------------------------------------
+// Error handling
+
+type errorList []os.Error
+
+func (list errorList) Error() os.Error {
+	if len(list) == 0 {
+		return nil
+	}
+	return list
+}
+
+func (list errorList) String() string {
+	switch len(list) {
+	case 0:
+		return "no errors"
+	case 1:
+		return list[0].String()
+	}
+	return fmt.Sprintf("%s (and %d more errors)", list[0], len(list)-1)
+}
+
+func newError(pos scanner.Position, msg string) os.Error {
+	return os.NewError(fmt.Sprintf("%s: %s", pos, msg))
+}
+
+// ----------------------------------------------------------------------------
+// Internal representation
+
+type (
+	// An Expression node represents a production expression.
+	Expression interface {
+		// Pos is the position of the first character of the syntactic construct
+		Pos() scanner.Position
+	}
+
+	// An Alternative node represents a non-empty list of alternative expressions.
+	Alternative []Expression // x | y | z
+
+	// A Sequence node represents a non-empty list of sequential expressions.
+	Sequence []Expression // x y z
+
+	// A Name node represents a production name.
+	Name struct {
+		StringPos scanner.Position
+		String    string
+	}
+
+	// A Token node represents a literal.
+	Token struct {
+		StringPos scanner.Position
+		String    string
+	}
+
+	// A List node represents a range of characters.
+	Range struct {
+		Begin, End *Token // begin ... end
+	}
+
+	// A Group node represents a grouped expression.
+	Group struct {
+		Lparen scanner.Position
+		Body   Expression // (body)
+	}
+
+	// An Option node represents an optional expression.
+	Option struct {
+		Lbrack scanner.Position
+		Body   Expression // [body]
+	}
+
+	// A Repetition node represents a repeated expression.
+	Repetition struct {
+		Lbrace scanner.Position
+		Body   Expression // {body}
+	}
+
+	// A Production node represents an EBNF production.
+	Production struct {
+		Name *Name
+		Expr Expression
+	}
+
+	// A Bad node stands for pieces of source code that lead to a parse error.
+	Bad struct {
+		TokPos scanner.Position
+		Error  string // parser error message
+	}
+
+	// A Grammar is a set of EBNF productions. The map
+	// is indexed by production name.
+	//
+	Grammar map[string]*Production
+)
+
+func (x Alternative) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Alternative
+func (x Sequence) Pos() scanner.Position    { return x[0].Pos() } // the parser always generates non-empty Sequences
+func (x *Name) Pos() scanner.Position       { return x.StringPos }
+func (x *Token) Pos() scanner.Position      { return x.StringPos }
+func (x *Range) Pos() scanner.Position      { return x.Begin.Pos() }
+func (x *Group) Pos() scanner.Position      { return x.Lparen }
+func (x *Option) Pos() scanner.Position     { return x.Lbrack }
+func (x *Repetition) Pos() scanner.Position { return x.Lbrace }
+func (x *Production) Pos() scanner.Position { return x.Name.Pos() }
+func (x *Bad) Pos() scanner.Position        { return x.TokPos }
+
+// ----------------------------------------------------------------------------
+// Grammar verification
+
+func isLexical(name string) bool {
+	ch, _ := utf8.DecodeRuneInString(name)
+	return !unicode.IsUpper(ch)
+}
+
+type verifier struct {
+	errors   errorList
+	worklist []*Production
+	reached  Grammar // set of productions reached from (and including) the root production
+	grammar  Grammar
+}
+
+func (v *verifier) error(pos scanner.Position, msg string) {
+	v.errors = append(v.errors, newError(pos, msg))
+}
+
+func (v *verifier) push(prod *Production) {
+	name := prod.Name.String
+	if _, found := v.reached[name]; !found {
+		v.worklist = append(v.worklist, prod)
+		v.reached[name] = prod
+	}
+}
+
+func (v *verifier) verifyChar(x *Token) int {
+	s := x.String
+	if utf8.RuneCountInString(s) != 1 {
+		v.error(x.Pos(), "single char expected, found "+s)
+		return 0
+	}
+	ch, _ := utf8.DecodeRuneInString(s)
+	return ch
+}
+
+func (v *verifier) verifyExpr(expr Expression, lexical bool) {
+	switch x := expr.(type) {
+	case nil:
+		// empty expression
+	case Alternative:
+		for _, e := range x {
+			v.verifyExpr(e, lexical)
+		}
+	case Sequence:
+		for _, e := range x {
+			v.verifyExpr(e, lexical)
+		}
+	case *Name:
+		// a production with this name must exist;
+		// add it to the worklist if not yet processed
+		if prod, found := v.grammar[x.String]; found {
+			v.push(prod)
+		} else {
+			v.error(x.Pos(), "missing production "+x.String)
+		}
+		// within a lexical production references
+		// to non-lexical productions are invalid
+		if lexical && !isLexical(x.String) {
+			v.error(x.Pos(), "reference to non-lexical production "+x.String)
+		}
+	case *Token:
+		// nothing to do for now
+	case *Range:
+		i := v.verifyChar(x.Begin)
+		j := v.verifyChar(x.End)
+		if i >= j {
+			v.error(x.Pos(), "decreasing character range")
+		}
+	case *Group:
+		v.verifyExpr(x.Body, lexical)
+	case *Option:
+		v.verifyExpr(x.Body, lexical)
+	case *Repetition:
+		v.verifyExpr(x.Body, lexical)
+	case *Bad:
+		v.error(x.Pos(), x.Error)
+	default:
+		panic(fmt.Sprintf("internal error: unexpected type %T", expr))
+	}
+}
+
+func (v *verifier) verify(grammar Grammar, start string) {
+	// find root production
+	root, found := grammar[start]
+	if !found {
+		var noPos scanner.Position
+		v.error(noPos, "no start production "+start)
+		return
+	}
+
+	// initialize verifier
+	v.worklist = v.worklist[0:0]
+	v.reached = make(Grammar)
+	v.grammar = grammar
+
+	// work through the worklist
+	v.push(root)
+	for {
+		n := len(v.worklist) - 1
+		if n < 0 {
+			break
+		}
+		prod := v.worklist[n]
+		v.worklist = v.worklist[0:n]
+		v.verifyExpr(prod.Expr, isLexical(prod.Name.String))
+	}
+
+	// check if all productions were reached
+	if len(v.reached) < len(v.grammar) {
+		for name, prod := range v.grammar {
+			if _, found := v.reached[name]; !found {
+				v.error(prod.Pos(), name+" is unreachable")
+			}
+		}
+	}
+}
+
+// Verify checks that:
+//	- all productions used are defined
+//	- all productions defined are used when beginning at start
+//	- lexical productions refer only to other lexical productions
+//
+// Position information is interpreted relative to the file set fset.
+//
+func Verify(grammar Grammar, start string) os.Error {
+	var v verifier
+	v.verify(grammar, start)
+	return v.errors.Error()
+}
diff --git a/src/pkg/exp/ebnf/ebnf_test.go b/src/pkg/exp/ebnf/ebnf_test.go
new file mode 100644
index 0000000000..8cfd6b9c37
--- /dev/null
+++ b/src/pkg/exp/ebnf/ebnf_test.go
@@ -0,0 +1,71 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ebnf
+
+import (
+	"bytes"
+	"testing"
+)
+
+var goodGrammars = []string{
+	`Program = .`,
+
+	`Program = foo .
+	 foo = "foo" .`,
+
+	`Program = "a" | "b" "c" .`,
+
+	`Program = "a" … "z" .`,
+
+	`Program = Song .
+	 Song = { Note } .
+	 Note = Do | (Re | Mi | Fa | So | La) | Ti .
+	 Do = "c" .
+	 Re = "d" .
+	 Mi = "e" .
+	 Fa = "f" .
+	 So = "g" .
+	 La = "a" .
+	 Ti = ti .
+	 ti = "b" .`,
+}
+
+var badGrammars = []string{
+	`Program = | .`,
+	`Program = | b .`,
+	`Program = a … b .`,
+	`Program = "a" … .`,
+	`Program = … "b" .`,
+	`Program = () .`,
+	`Program = [] .`,
+	`Program = {} .`,
+}
+
+func checkGood(t *testing.T, src string) {
+	grammar, err := Parse("", bytes.NewBuffer([]byte(src)))
+	if err != nil {
+		t.Errorf("Parse(%s) failed: %v", src, err)
+		return
+	}
+	if err = Verify(grammar, "Program"); err != nil {
+		t.Errorf("Verify(%s) failed: %v", src, err)
+	}
+}
+
+func checkBad(t *testing.T, src string) {
+	_, err := Parse("", bytes.NewBuffer([]byte(src)))
+	if err == nil {
+		t.Errorf("Parse(%s) should have failed", src)
+	}
+}
+
+func TestGrammars(t *testing.T) {
+	for _, src := range goodGrammars {
+		checkGood(t, src)
+	}
+	for _, src := range badGrammars {
+		checkBad(t, src)
+	}
+}
diff --git a/src/pkg/exp/ebnf/parser.go b/src/pkg/exp/ebnf/parser.go
new file mode 100644
index 0000000000..2dbbefb751
--- /dev/null
+++ b/src/pkg/exp/ebnf/parser.go
@@ -0,0 +1,191 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ebnf
+
+import (
+	"io"
+	"os"
+	"scanner"
+	"strconv"
+)
+
+type parser struct {
+	errors  errorList
+	scanner scanner.Scanner
+	pos     scanner.Position // token position
+	tok     int              // one token look-ahead
+	lit     string           // token literal
+}
+
+func (p *parser) next() {
+	p.tok = p.scanner.Scan()
+	p.pos = p.scanner.Position
+	p.lit = p.scanner.TokenText()
+}
+
+func (p *parser) error(pos scanner.Position, msg string) {
+	p.errors = append(p.errors, newError(pos, msg))
+}
+
+func (p *parser) errorExpected(pos scanner.Position, msg string) {
+	msg = `expected "` + msg + `"`
+	if pos.Offset == p.pos.Offset {
+		// the error happened at the current position;
+		// make the error message more specific
+		msg += ", found " + scanner.TokenString(p.tok)
+		if p.tok < 0 {
+			msg += " " + p.lit
+		}
+	}
+	p.error(pos, msg)
+}
+
+func (p *parser) expect(tok int) scanner.Position {
+	pos := p.pos
+	if p.tok != tok {
+		p.errorExpected(pos, scanner.TokenString(tok))
+	}
+	p.next() // make progress in any case
+	return pos
+}
+
+func (p *parser) parseIdentifier() *Name {
+	pos := p.pos
+	name := p.lit
+	p.expect(scanner.Ident)
+	return &Name{pos, name}
+}
+
+func (p *parser) parseToken() *Token {
+	pos := p.pos
+	value := ""
+	if p.tok == scanner.String {
+		value, _ = strconv.Unquote(p.lit)
+		// Unquote may fail with an error, but only if the scanner found
+		// an illegal string in the first place. In this case the error
+		// has already been reported.
+		p.next()
+	} else {
+		p.expect(scanner.String)
+	}
+	return &Token{pos, value}
+}
+
+// ParseTerm returns nil if no term was found.
+func (p *parser) parseTerm() (x Expression) {
+	pos := p.pos
+
+	switch p.tok {
+	case scanner.Ident:
+		x = p.parseIdentifier()
+
+	case scanner.String:
+		tok := p.parseToken()
+		x = tok
+		const ellipsis = '…' // U+2026, the horizontal ellipsis character
+		if p.tok == ellipsis {
+			p.next()
+			x = &Range{tok, p.parseToken()}
+		}
+
+	case '(':
+		p.next()
+		x = &Group{pos, p.parseExpression()}
+		p.expect(')')
+
+	case '[':
+		p.next()
+		x = &Option{pos, p.parseExpression()}
+		p.expect(']')
+
+	case '{':
+		p.next()
+		x = &Repetition{pos, p.parseExpression()}
+		p.expect('}')
+	}
+
+	return x
+}
+
+func (p *parser) parseSequence() Expression {
+	var list Sequence
+
+	for x := p.parseTerm(); x != nil; x = p.parseTerm() {
+		list = append(list, x)
+	}
+
+	// no need for a sequence if list.Len() < 2
+	switch len(list) {
+	case 0:
+		p.errorExpected(p.pos, "term")
+		return &Bad{p.pos, "term expected"}
+	case 1:
+		return list[0]
+	}
+
+	return list
+}
+
+func (p *parser) parseExpression() Expression {
+	var list Alternative
+
+	for {
+		list = append(list, p.parseSequence())
+		if p.tok != '|' {
+			break
+		}
+		p.next()
+	}
+	// len(list) > 0
+
+	// no need for an Alternative node if list.Len() < 2
+	if len(list) == 1 {
+		return list[0]
+	}
+
+	return list
+}
+
+func (p *parser) parseProduction() *Production {
+	name := p.parseIdentifier()
+	p.expect('=')
+	var expr Expression
+	if p.tok != '.' {
+		expr = p.parseExpression()
+	}
+	p.expect('.')
+	return &Production{name, expr}
+}
+
+func (p *parser) parse(filename string, src io.Reader) Grammar {
+	p.scanner.Init(src)
+	p.scanner.Filename = filename
+	p.next() // initializes pos, tok, lit
+
+	grammar := make(Grammar)
+	for p.tok != scanner.EOF {
+		prod := p.parseProduction()
+		name := prod.Name.String
+		if _, found := grammar[name]; !found {
+			grammar[name] = prod
+		} else {
+			p.error(prod.Pos(), name+" declared already")
+		}
+	}
+
+	return grammar
+}
+
+// Parse parses a set of EBNF productions from source src.
+// It returns a set of productions. Errors are reported
+// for incorrect syntax and if a production is declared
+// more than once; the filename is used only for error
+// positions.
+//
+func Parse(filename string, src io.Reader) (Grammar, os.Error) {
+	var p parser
+	grammar := p.parse(filename, src)
+	return grammar, p.errors.Error()
+}
diff --git a/src/pkg/exp/ebnflint/Makefile b/src/pkg/exp/ebnflint/Makefile
new file mode 100644
index 0000000000..2057b07d58
--- /dev/null
+++ b/src/pkg/exp/ebnflint/Makefile
@@ -0,0 +1,15 @@
+# Copyright 2009 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include ../../../Make.inc
+
+TARG=ebnflint
+GOFILES=\
+	ebnflint.go\
+
+include ../../../Make.cmd
+
+test: $(TARG)
+	$(TARG) -start="SourceFile" "$(GOROOT)"/doc/go_spec.html
+
diff --git a/src/pkg/exp/ebnflint/doc.go b/src/pkg/exp/ebnflint/doc.go
new file mode 100644
index 0000000000..f35976eea7
--- /dev/null
+++ b/src/pkg/exp/ebnflint/doc.go
@@ -0,0 +1,22 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+
+Ebnflint verifies that EBNF productions are consistent and gramatically correct.
+It reads them from an HTML document such as the Go specification.
+
+Grammar productions are grouped in boxes demarcated by the HTML elements
+	<pre class="ebnf">
+	</pre>
+
+
+Usage:
+	ebnflint [--start production] [file]
+
+The --start flag specifies the name of the start production for
+the grammar; it defaults to "Start".
+
+*/
+package documentation
diff --git a/src/pkg/exp/ebnflint/ebnflint.go b/src/pkg/exp/ebnflint/ebnflint.go
new file mode 100644
index 0000000000..c827716c44
--- /dev/null
+++ b/src/pkg/exp/ebnflint/ebnflint.go
@@ -0,0 +1,109 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"bytes"
+	"exp/ebnf"
+	"flag"
+	"fmt"
+	"go/scanner"
+	"go/token"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+)
+
+var fset = token.NewFileSet()
+var start = flag.String("start", "Start", "name of start production")
+
+func usage() {
+	fmt.Fprintf(os.Stderr, "usage: ebnflint [flags] [filename]\n")
+	flag.PrintDefaults()
+	os.Exit(1)
+}
+
+// Markers around EBNF sections in .html files
+var (
+	open  = []byte(`<pre class="ebnf">`)
+	close = []byte(`</pre>`)
+)
+
+func report(err os.Error) {
+	scanner.PrintError(os.Stderr, err)
+	os.Exit(1)
+}
+
+func extractEBNF(src []byte) []byte {
+	var buf bytes.Buffer
+
+	for {
+		// i = beginning of EBNF text
+		i := bytes.Index(src, open)
+		if i < 0 {
+			break // no EBNF found - we are done
+		}
+		i += len(open)
+
+		// write as many newlines as found in the excluded text
+		// to maintain correct line numbers in error messages
+		for _, ch := range src[0:i] {
+			if ch == '\n' {
+				buf.WriteByte('\n')
+			}
+		}
+
+		// j = end of EBNF text (or end of source)
+		j := bytes.Index(src[i:], close) // close marker
+		if j < 0 {
+			j = len(src) - i
+		}
+		j += i
+
+		// copy EBNF text
+		buf.Write(src[i:j])
+
+		// advance
+		src = src[j:]
+	}
+
+	return buf.Bytes()
+}
+
+func main() {
+	flag.Parse()
+
+	var (
+		filename string
+		src      []byte
+		err      os.Error
+	)
+	switch flag.NArg() {
+	case 0:
+		filename = "<stdin>"
+		src, err = ioutil.ReadAll(os.Stdin)
+	case 1:
+		filename = flag.Arg(0)
+		src, err = ioutil.ReadFile(filename)
+	default:
+		usage()
+	}
+	if err != nil {
+		report(err)
+	}
+
+	if filepath.Ext(filename) == ".html" || bytes.Index(src, open) >= 0 {
+		src = extractEBNF(src)
+	}
+
+	grammar, err := ebnf.Parse(filename, bytes.NewBuffer(src))
+	if err != nil {
+		report(err)
+	}
+
+	if err = ebnf.Verify(grammar, *start); err != nil {
+		report(err)
+	}
+}
-- 
cgit v1.3-5-g9baa