cmd/compile: better syntax error handling for new parser
- better error messages
- better error recovery by advancing to "follow" token after error
- make sure that we make progress after all errors
- minor cleanups
Change-Id: Ie43b8b02799618d70dc8fc227fab3e4e9e0d8e3a
Reviewed-on: https://go-review.googlesource.com/16892
Run-TryBot: Robert Griesemer <gri@golang.org>
Reviewed-by: Chris Manghane <cmang@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
diff --git a/src/cmd/compile/internal/gc/go.go b/src/cmd/compile/internal/gc/go.go
index 29747d5..4446549 100644
--- a/src/cmd/compile/internal/gc/go.go
+++ b/src/cmd/compile/internal/gc/go.go
@@ -484,8 +484,7 @@
var lexbuf bytes.Buffer
var strbuf bytes.Buffer
-
-var litbuf string
+var litbuf string // LLITERAL value for use in syntax error messages
var Debug [256]int
diff --git a/src/cmd/compile/internal/gc/parser.go b/src/cmd/compile/internal/gc/parser.go
index 4f0a6ea..40fe1e1 100644
--- a/src/cmd/compile/internal/gc/parser.go
+++ b/src/cmd/compile/internal/gc/parser.go
@@ -10,7 +10,7 @@
"strings"
)
-const trace = true // if set, parse tracing can be enabled with -x
+const trace = false // if set, parse tracing can be enabled with -x
// TODO(gri) Once we handle imports w/o redirecting the underlying
// source of the lexer we can get rid of these. They are here for
@@ -31,22 +31,6 @@
}
func parse_file() {
- // This doesn't quite work w/ the trybots. Fun experiment but we need to find a better way.
- // go func() {
- // prev := lexlineno
- // for {
- // time.Sleep(5 * time.Second)
- // t := lexlineno // racy but we don't care - any new value will do
- // if prev == t {
- // // If lexlineno doesn't change anymore we probably have an endless
- // // loop somewhere. Terminate before process becomes unresponsive.
- // Yyerror("internal error: compiler makes no progress (workaround: -oldparser)")
- // errorexit()
- // }
- // prev = t
- // }
- // }()
-
thenewparser = parser{}
thenewparser.loadsys()
thenewparser.next()
@@ -105,38 +89,70 @@
func (p *parser) want(tok int32) {
if p.tok != EOF && !p.got(tok) {
- p.error("")
+ p.syntax_error("")
+ p.advance()
}
}
// ----------------------------------------------------------------------------
// Syntax error handling
-// TODO(gri) Approach this more systematically. For now it passes all tests.
+func (p *parser) syntax_error(msg string) {
+ if p.tok == EOF && nerrors > 0 {
+ return // avoid meaningless follow-up errors
+ }
-func syntax_error(msg string) {
- Yyerror("syntax error: " + msg)
-}
-
-func (p *parser) error(context string) {
- if p.tok == EOF {
+ // add punctuation etc. as needed to msg
+ switch {
+ case msg == "":
+ // nothing to do
+ case strings.HasPrefix(msg, "in"), strings.HasPrefix(msg, "at"), strings.HasPrefix(msg, "after"):
+ msg = " " + msg
+ case strings.HasPrefix(msg, "expecting"):
+ msg = ", " + msg
+ default:
+ // plain error - we don't care about current token
+ Yyerror("syntax error: " + msg)
return
}
- syntax_error("unexpected " + tokstring(p.tok) + context)
- // TODO(gri) keep also track of nesting below
+
+ // determine token string
+ var tok string
switch p.tok {
- case '(':
- // skip to closing ')'
- for p.tok != EOF && p.tok != ')' {
- p.next()
+ case LLITERAL:
+ // this is also done in Yyerror but it's cleaner to do it here
+ tok = litbuf
+ case LNAME:
+ if p.sym_ != nil && p.sym_.Name != "" {
+ tok = p.sym_.Name
+ } else {
+ tok = "name"
}
- case '{':
- // skip to closing '}'
- for p.tok != EOF && p.tok != '}' {
- p.next()
- }
+ case LASOP:
+ tok = goopnames[p.op] + "="
+ default:
+ tok = tokstring(p.tok)
}
- p.next() // make progress
+
+ Yyerror("syntax error: unexpected " + tok + msg)
+}
+
+// Advance consumes tokens until it finds one in the stoplist.
+// If the stoplist is empty, the next token is consumed.
+func (p *parser) advance(stoplist ...int32) {
+ if len(stoplist) == 0 {
+ p.next()
+ return
+ }
+
+ for p.tok != EOF {
+ for _, stop := range stoplist {
+ if p.tok == stop {
+ return
+ }
+ }
+ p.next()
+ }
}
func tokstring(tok int32) string {
@@ -186,7 +202,7 @@
LIMPORT: "import",
LINTERFACE: "interface",
LMAP: "map",
- LNAME: "<name>",
+ LNAME: "LNAME",
LPACKAGE: "package",
LRANGE: "range",
LRETURN: "return",
@@ -279,17 +295,12 @@
mkpackage(p.sym().Name)
p.want(';')
} else {
- prevlineno = lineno // TODO(gri) do we still need this? (e.g., not needed for test/fixedbugs/bug050.go)
- Yyerror("package statement must be first")
+ p.syntax_error("package statement must be first")
errorexit()
}
}
-// import:
-// LIMPORT import_stmt
-// | LIMPORT '(' import_stmt_list osemi ')'
-// | LIMPORT '(' ')'
-
+// go.y:import
func (p *parser) import_() {
if trace && Debug['x'] != 0 {
defer p.trace("import_")()
@@ -388,7 +399,8 @@
path = p.val
p.next()
} else {
- syntax_error("missing import path; require quoted string")
+ p.syntax_error("missing import path; require quoted string")
+ p.advance(';', ')')
}
line := parserline() // TODO(gri) check correct placement of this
@@ -550,7 +562,8 @@
if p.tok != ';' {
typ = p.ntype()
} else {
- p.error(" in type declaration")
+ p.syntax_error("in type declaration")
+ p.advance(';', ')')
}
return list1(typedcl1(name, typ, true))
@@ -612,7 +625,8 @@
if lhs.Op == ONAME || lhs.Op == ONONAME || lhs.Op == OTYPE {
lhs = newname(lhs.Sym)
} else {
- p.error(", expecting semicolon or newline or }")
+ p.syntax_error("expecting semicolon or newline or }")
+ // we already progressed, no need to advance
}
lhs := Nod(OLABEL, lhs, nil)
lhs.Sym = dclstack // context, for goto restrictions
@@ -677,7 +691,7 @@
rhs := p.expr_list()
if rhs.N.Op == OTYPESW {
- ss := Nod(OTYPESW, nil, rhs.N.Right)
+ ts := Nod(OTYPESW, nil, rhs.N.Right)
if rhs.Next != nil {
Yyerror("expr.(type) must be alone in list")
}
@@ -686,14 +700,15 @@
} else if (lhs.N.Op != ONAME && lhs.N.Op != OTYPE && lhs.N.Op != ONONAME && (lhs.N.Op != OLITERAL || lhs.N.Name == nil)) || isblank(lhs.N) {
Yyerror("invalid variable name %s in type switch", lhs.N)
} else {
- ss.Left = dclname(lhs.N.Sym)
- } // it's a colas, so must not re-use an oldname.
- return ss
+ ts.Left = dclname(lhs.N.Sym)
+ } // it's a colas, so must not re-use an oldname
+ return ts
}
return colas(lhs, rhs, int32(line))
default:
- p.error(", expecting := or = or comma")
+ p.syntax_error("expecting := or = or comma")
+ p.advance(';', '}')
return nil
}
}
@@ -711,7 +726,8 @@
// report error at line of ':' token
saved := lexlineno
lexlineno = prevlineno
- syntax_error("missing statement after label")
+ p.syntax_error("missing statement after label")
+ // we are already at the end of the labeled statement - no need to advance
lexlineno = saved
return missing_stmt
}
@@ -799,7 +815,8 @@
return stmt
default:
- p.error(", expecting := or = or : or comma")
+ p.syntax_error("expecting := or = or : or comma")
+ p.advance(LCASE, LDEFAULT, '}')
return nil
}
@@ -825,7 +842,8 @@
return stmt
default:
- p.error(", expecting case or default or }")
+ p.syntax_error("expecting case or default or }")
+ p.advance(LCASE, LDEFAULT, '}')
return nil
}
}
@@ -840,12 +858,8 @@
markdcl()
p.next() // consume ';' after markdcl() for correct lineno
} else if else_clause {
- syntax_error("else must be followed by if or statement block")
- // skip through closing }
- for p.tok != EOF && p.tok != '}' {
- p.next()
- }
- p.next()
+ p.syntax_error("else must be followed by if or statement block")
+ p.advance('}')
return nil
} else {
panic("unreachable")
@@ -911,12 +925,8 @@
}
if !p.got('{') {
- syntax_error("missing { after switch clause")
- // skip through closing }
- for p.tok != EOF && p.tok != '}' {
- p.next()
- }
- p.next()
+ p.syntax_error("missing { after switch clause")
+ p.advance('}')
return nil
}
@@ -937,12 +947,8 @@
markdcl()
p.next() // consume ';' after markdcl() for correct lineno
} else {
- syntax_error("missing { after " + context)
- // skip through closing }
- for p.tok != EOF && p.tok != '}' {
- p.next()
- }
- p.next()
+ p.syntax_error("missing { after " + context)
+ p.advance('}')
return nil
}
@@ -1386,12 +1392,13 @@
case '{':
// common case: p.header is missing simple_stmt before { in if, for, switch
- syntax_error("missing operand")
+ p.syntax_error("missing operand")
// '{' will be consumed in pexpr - no need to consume it here
return nil
default:
- p.error(" in operand")
+ p.syntax_error("in operand")
+ p.advance(';', '}')
return nil
}
}
@@ -1439,7 +1446,8 @@
}
default:
- p.error(", expecting name or (")
+ p.syntax_error("expecting name or (")
+ p.advance(';', '}')
}
case '[':
@@ -1522,7 +1530,8 @@
break loop
}
if t != x {
- syntax_error("cannot parenthesize type in composite literal")
+ p.syntax_error("cannot parenthesize type in composite literal")
+ // already progressed, no need to advance
}
n := p.complitexpr()
n.Right = x
@@ -1650,7 +1659,8 @@
return nil
default:
- p.error("")
+ p.syntax_error("")
+ p.advance()
return new(Sym)
}
}
@@ -1719,11 +1729,13 @@
case LDDD:
// permit ...T but complain
// TODO(gri) introduced for test/fixedbugs/bug228.go - maybe adjust bug or find better solution
- p.error(" in type")
+ p.syntax_error("")
+ p.advance()
return p.ntype()
default:
- p.error(" in type")
+ p.syntax_error("")
+ p.advance()
return nil
}
}
@@ -1742,7 +1754,8 @@
LDDD:
return p.ntype()
default:
- syntax_error("missing channel element type")
+ p.syntax_error("missing channel element type")
+ // assume element type is simply absent - don't advance
return nil
}
}
@@ -2035,7 +2048,8 @@
return f
default:
- p.error(", expecting name or (")
+ p.syntax_error("expecting name or (")
+ p.advance('{', ';')
return nil
}
}
@@ -2176,20 +2190,12 @@
default:
if p.tok == '{' && l != nil && l.End.N.Op == ODCLFUNC && l.End.N.Nbody == nil {
// opening { of function declaration on next line
- syntax_error("unexpected semicolon or newline before {")
+ p.syntax_error("unexpected semicolon or newline before {")
} else {
- syntax_error("non-declaration statement outside function body")
+ p.syntax_error("non-declaration statement outside function body")
}
- // skip over tokens until we find a new top-level declaration
- // TODO(gri) keep track of {} nesting as well?
- for {
- p.next()
- switch p.tok {
- case LVAR, LCONST, LTYPE, LFUNC, EOF:
- continue loop
- }
- }
-
+ p.advance(LVAR, LCONST, LTYPE, LFUNC)
+ goto loop
}
if nsyntaxerrors == 0 {
@@ -2209,15 +2215,9 @@
// it may read the subsequent comment line which may
// set the flags for the next function declaration.
if p.tok != EOF && !p.got(';') {
- p.error(" after top level declaration")
- // TODO(gri) same code above - factor!
- for {
- p.next()
- switch p.tok {
- case LVAR, LCONST, LTYPE, LFUNC, EOF:
- continue loop
- }
- }
+ p.syntax_error("after top level declaration")
+ p.advance(LVAR, LCONST, LTYPE, LFUNC)
+ goto loop
}
}
return
@@ -2329,7 +2329,8 @@
}
default:
- p.error(", expecting field name or embedded type")
+ p.syntax_error("expecting field name or embedded type")
+ p.advance(';', '}')
return nil
}
}
@@ -2355,7 +2356,8 @@
name = p.sym_
p.next()
} else {
- p.error(", expecting name")
+ p.syntax_error("expecting name")
+ p.advance('.', ';', '}')
name = new(Sym)
}
@@ -2409,7 +2411,8 @@
hasNameList = true
}
if hasNameList {
- syntax_error("name list not allowed in interface type")
+ p.syntax_error("name list not allowed in interface type")
+ // already progressed, no need to advance
}
if p.tok != '(' {
@@ -2435,7 +2438,8 @@
return n
default:
- p.error("")
+ p.syntax_error("")
+ p.advance(';', '}')
return nil
}
}
@@ -2509,7 +2513,8 @@
return p.ntype()
default:
- p.error(", expecting )")
+ p.syntax_error("expecting )")
+ p.advance(',', ')')
return nil
}
}
@@ -2677,7 +2682,8 @@
continue
}
if !p.got(';') {
- p.error(" at end of statement")
+ p.syntax_error("at end of statement")
+ p.advance(';', '}')
}
}
return
@@ -2760,8 +2766,8 @@
// ',' is optional before a closing ')' or '}'
return
case ';':
- syntax_error("need trailing comma before newline in " + context)
- p.next()
+ p.syntax_error("need trailing comma before newline in " + context)
+ p.next() // interpret ';' as comma
return
}
p.want(',')
@@ -2771,7 +2777,8 @@
// Importing packages
func (p *parser) import_error() {
- p.error(" in export data of imported package")
+ p.syntax_error("in export data of imported package")
+ p.next()
}
// The methods below reflect a 1:1 translation of the corresponding go.y yacc