usr/gri/gosrc/scanner.go - go - Git at Google

 // Copyright 2009 The Go Authors.  All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package Scanner

 import Platform "platform"
 import Utils "utils"


 export const (
 	ILLEGAL = iota;
 	EOF;
 	INT;
 	FLOAT;
 	STRING;

 	COMMA;
 	COLON;
 	SEMICOLON;
 	PERIOD;

 	LPAREN;
 	RPAREN;
 	LBRACK;
 	RBRACK;
 	LBRACE;
 	RBRACE;

 	ASSIGN;
 	DEFINE;

 	INC;
 	DEC;
 	NOT;

 	AND;
 	OR;
 	XOR;

 	ADD;
 	SUB;
 	MUL;
 	QUO;
 	REM;

 	EQL;
 	NEQ;
 	LSS;
 	LEQ;
 	GTR;
 	GEQ;

 	SHL;
 	SHR;

 	ARROW;

 	ADD_ASSIGN;
 	SUB_ASSIGN;
 	MUL_ASSIGN;
 	QUO_ASSIGN;
 	REM_ASSIGN;

 	AND_ASSIGN;
 	OR_ASSIGN;
 	XOR_ASSIGN;

 	SHL_ASSIGN;
 	SHR_ASSIGN;

 	LAND;
 	LOR;

 	// IDENT must be immediately before keywords
 	IDENT;

 	// keywords
 	KEYWORDS_BEG;
 	BREAK;
 	CASE;
 	CHAN;
 	CONST;
 	CONTINUE;
 	DEFAULT;
 	ELSE;
 	EXPORT;
 	FALLTHROUGH;
 	FALSE;
 	FOR;
 	FUNC;
 	GO;
 	GOTO;
 	IF;
 	IMPORT;
 	INTERFACE;
 	IOTA;
 	MAP;
 	NEW;
 	NIL;
 	PACKAGE;
 	RANGE;
 	RETURN;
 	SELECT;
 	STRUCT;
 	SWITCH;
 	TRUE;
 	TYPE;
 	VAR;
 	KEYWORDS_END;
 )


 var Keywords *map [string] int;
 var VerboseMsgs bool;  // error message customization


 export func TokenName(tok int) string {
 	switch (tok) {
 	case ILLEGAL: return "illegal";
 	case EOF: return "eof";
 	case INT: return "int";
 	case FLOAT: return "float";
 	case STRING: return "string";

 	case COMMA: return ",";
 	case COLON: return ":";
 	case SEMICOLON: return ";";
 	case PERIOD: return ".";

 	case LPAREN: return "(";
 	case RPAREN: return ")";
 	case LBRACK: return "[";
 	case RBRACK: return "]";
 	case LBRACE: return "LBRACE";
 	case RBRACE: return "RBRACE";

 	case ASSIGN: return "=";
 	case DEFINE: return ":=";

 	case INC: return "++";
 	case DEC: return "--";
 	case NOT: return "!";

 	case AND: return "&";
 	case OR: return "|";
 	case XOR: return "^";

 	case ADD: return "+";
 	case SUB: return "-";
 	case MUL: return "*";
 	case QUO: return "/";
 	case REM: return "%";

 	case EQL: return "==";
 	case NEQ: return "!=";
 	case LSS: return "<";
 	case LEQ: return "<=";
 	case GTR: return ">";
 	case GEQ: return ">=";

 	case SHL: return "<<";
 	case SHR: return ">>";

 	case ARROW: return "<-";

 	case ADD_ASSIGN: return "+=";
 	case SUB_ASSIGN: return "-=";
 	case MUL_ASSIGN: return "+=";
 	case QUO_ASSIGN: return "/=";
 	case REM_ASSIGN: return "%=";

 	case AND_ASSIGN: return "&=";
 	case OR_ASSIGN: return "|=";
 	case XOR_ASSIGN: return "^=";

 	case SHL_ASSIGN: return "<<=";
 	case SHR_ASSIGN: return ">>=";

 	case LAND: return "&&";
 	case LOR: return "||";

 	case IDENT: return "ident";

 	case BREAK: return "break";
 	case CASE: return "case";
 	case CHAN: return "chan";
 	case CONST: return "const";
 	case CONTINUE: return "continue";
 	case DEFAULT: return "default";
 	case ELSE: return "else";
 	case EXPORT: return "export";
 	case FALLTHROUGH: return "fallthrough";
 	case FALSE: return "false";
 	case FOR: return "for";
 	case FUNC: return "func";
 	case GO: return "go";
 	case GOTO: return "goto";
 	case IF: return "if";
 	case IMPORT: return "import";
 	case INTERFACE: return "interface";
 	case IOTA: return "iota";
 	case MAP: return "map";
 	case NEW: return "new";
 	case NIL: return "nil";
 	case PACKAGE: return "package";
 	case RANGE: return "range";
 	case RETURN: return "return";
 	case SELECT: return "select";
 	case STRUCT: return "struct";
 	case SWITCH: return "switch";
 	case TRUE: return "true";
 	case TYPE: return "type";
 	case VAR: return "var";
 	}

 	return "???";
 }


 func init() {
 	Keywords = new(map [string] int);

 	for i := KEYWORDS_BEG; i <= KEYWORDS_END; i++ {
 	  Keywords[TokenName(i)] = i;
 	}

 	// Provide column information in error messages for gri only...
 	VerboseMsgs = Platform.USER == "gri";
 }


 func is_whitespace(ch int) bool {
 	return ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t';
 }


 func is_letter(ch int) bool {
 	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 128 ;
 }


 func digit_val(ch int) int {
 	if '0' <= ch && ch <= '9' {
 		return ch - '0';
 	}
 	if 'a' <= ch && ch <= 'f' {
 		return ch - 'a' + 10;
 	}
 	if 'A' <= ch && ch <= 'F' {
 		return ch - 'A' + 10;
 	}
 	return 16;  // larger than any legal digit val
 }


 export type Scanner struct {
 	filename string;  // error reporting only
 	nerrors int;  // number of errors
 	errpos int;  // last error position

 	src string;  // scanned source
 	pos int;  // current reading position
 	ch int;  // one char look-ahead
 	chpos int;  // position of ch
 }


 // Read the next Unicode char into S.ch.
 // S.ch < 0 means end-of-file.
 //
 func (S *Scanner) Next() {
 	const (
 		Bit1 = 7;
 		Bitx = 6;
 		Bit2 = 5;
 		Bit3 = 4;
 		Bit4 = 3;

 		T1 = (1 << (Bit1 + 1) - 1) ^ 0xFF;  // 0000 0000
 		Tx = (1 << (Bitx + 1) - 1) ^ 0xFF;  // 1000 0000
 		T2 = (1 << (Bit2 + 1) - 1) ^ 0xFF;  // 1100 0000
 		T3 = (1 << (Bit3 + 1) - 1) ^ 0xFF;  // 1110 0000
 		T4 = (1 << (Bit4 + 1) - 1) ^ 0xFF;  // 1111 0000

 		Rune1 = 1 << (Bit1 + 0*Bitx) - 1;  // 0000 0000 0111 1111
 		Rune2 = 1 << (Bit2 + 1*Bitx) - 1;  // 0000 0111 1111 1111
 		Rune3 = 1 << (Bit3 + 2*Bitx) - 1;  // 1111 1111 1111 1111

 		Maskx = 0x3F;  // 1 << Bitx - 1;  // 0011 1111
 		Testx = 0xC0;  // Maskx ^ 0xFF;  // 1100 0000

 		Bad	= 0xFFFD;  // Runeerror
 	);

 	src := S.src;
 	lim := len(src);
 	pos := S.pos;

 	// 1-byte sequence
 	// 0000-007F => T1
 	if pos >= lim {
 		S.ch = -1;  // end of file
 		S.chpos = lim;
 		return;
 	}
 	c0 := int(src[pos]);
 	pos++;
 	if c0 < Tx {
 		S.ch = c0;
 		S.chpos = S.pos;
 		S.pos = pos;
 		return;
 	}

 	// 2-byte sequence
 	// 0080-07FF => T2 Tx
 	if pos >= lim {
 		goto bad;
 	}
 	c1 := int(src[pos]) ^ Tx;
 	pos++;
 	if c1 & Testx != 0 {
 		goto bad;
 	}
 	if c0 < T3 {
 		if c0 < T2 {
 			goto bad;
 		}
 		r := (c0 << Bitx | c1) & Rune2;
 		if  r <= Rune1 {
 			goto bad;
 		}
 		S.ch = r;
 		S.chpos = S.pos;
 		S.pos = pos;
 		return;
 	}

 	// 3-byte sequence
 	// 0800-FFFF => T3 Tx Tx
 	if pos >= lim {
 		goto bad;
 	}
 	c2 := int(src[pos]) ^ Tx;
 	pos++;
 	if c2 & Testx != 0 {
 		goto bad;
 	}
 	if c0 < T4 {
 		r := (((c0 << Bitx | c1) << Bitx) | c2) & Rune3;
 		if r <= Rune2 {
 			goto bad;
 		}
 		S.ch = r;
 		S.chpos = S.pos;
 		S.pos = pos;
 		return;
 	}

 	// bad encoding
 bad:
 	S.ch = Bad;
 	S.chpos = S.pos;
 	S.pos += 1;
 	return;
 }


 // Compute (line, column) information for a given source position.
 func (S *Scanner) LineCol(pos int) (line, col int) {
 	line = 1;
 	lpos := 0;

 	src := S.src;
 	if pos > len(src) {
 		pos = len(src);
 	}

 	for i := 0; i < pos; i++ {
 		if src[i] == '\n' {
 			line++;
 			lpos = i;
 		}
 	}

 	return line, pos - lpos;
 }


 func (S *Scanner) Error(pos int, msg string) {
 	const errdist = 10;
 	delta := pos - S.errpos;  // may be negative!
 	if delta < 0 {
 		delta = -delta;
 	}
 	if delta > errdist || S.nerrors == 0 /* always report first error */ {
 		print(S.filename);
 		if pos >= 0 {
 			// print position
 			line, col := S.LineCol(pos);
 			if VerboseMsgs {
 				print(":", line, ":", col);
 			} else {
 				print(":", line);
 			}
 		}
 		print(": ", msg, "\n");
 		S.nerrors++;
 		S.errpos = pos;
 	}

 	if S.nerrors >= 10 {
 		sys.exit(1);
 	}
 }


 func (S *Scanner) Open(filename, src string) {
 	S.filename = filename;
 	S.nerrors = 0;
 	S.errpos = 0;

 	S.src = src;
 	S.pos = 0;
 	S.Next();
 }


 func CharString(ch int) string {
 	s := string(ch);
 	switch ch {
 	case '\a': s = `\a`;
 	case '\b': s = `\b`;
 	case '\f': s = `\f`;
 	case '\n': s = `\n`;
 	case '\r': s = `\r`;
 	case '\t': s = `\t`;
 	case '\v': s = `\v`;
 	case '\\': s = `\\`;
 	case '\'': s = `\'`;
 	}
 	return "'" + s + "' (U+" + Utils.IntToString(ch, 16) + ")";
 }


 func (S *Scanner) Expect(ch int) {
 	if S.ch != ch {
 		S.Error(S.chpos, "expected " + CharString(ch) + ", found " + CharString(S.ch));
 	}
 	S.Next();  // make always progress
 }


 func (S *Scanner) SkipWhitespace() {
 	for is_whitespace(S.ch) {
 		S.Next();
 	}
 }


 func (S *Scanner) SkipComment() {
 	// '/' already consumed
 	if S.ch == '/' {
 		// comment
 		S.Next();
 		for S.ch != '\n' && S.ch >= 0 {
 			S.Next();
 		}

 	} else {
 		/* comment */
 		pos := S.chpos - 1;
 		S.Expect('*');
 		for S.ch >= 0 {
 			ch := S.ch;
 			S.Next();
 			if ch == '*' && S.ch == '/' {
 				S.Next();
 				return;
 			}
 		}
 		S.Error(pos, "comment not terminated");
 	}
 }


 func (S *Scanner) ScanIdentifier() (tok int, val string) {
 	pos := S.chpos;
 	for is_letter(S.ch) || digit_val(S.ch) < 10 {
 		S.Next();
 	}
 	val = S.src[pos : S.chpos];

 	var present bool;
 	tok, present = Keywords[val];
 	if !present {
 		tok = IDENT;
 	}

 	return tok, val;
 }


 func (S *Scanner) ScanMantissa(base int) {
 	for digit_val(S.ch) < base {
 		S.Next();
 	}
 }


 func (S *Scanner) ScanNumber(seen_decimal_point bool) (tok int, val string) {
 	pos := S.chpos;
 	tok = INT;

 	if seen_decimal_point {
 		tok = FLOAT;
 		pos--;  // '.' is one byte
 		S.ScanMantissa(10);
 		goto exponent;
 	}

 	if S.ch == '0' {
 		// int or float
 		S.Next();
 		if S.ch == 'x' || S.ch == 'X' {
 			// hexadecimal int
 			S.Next();
 			S.ScanMantissa(16);
 		} else {
 			// octal int or float
 			S.ScanMantissa(8);
 			if digit_val(S.ch) < 10 || S.ch == '.' || S.ch == 'e' || S.ch == 'E' {
 				// float
 				tok = FLOAT;
 				goto mantissa;
 			}
 			// octal int
 		}
 		goto exit;
 	}

 mantissa:
 	// decimal int or float
 	S.ScanMantissa(10);

 	if S.ch == '.' {
 		// float
 		tok = FLOAT;
 		S.Next();
 		S.ScanMantissa(10)
 	}

 exponent:
 	if S.ch == 'e' || S.ch == 'E' {
 		// float
 		tok = FLOAT;
 		S.Next();
 		if S.ch == '-' || S.ch == '+' {
 			S.Next();
 		}
 		S.ScanMantissa(10);
 	}

 exit:
 	return tok, S.src[pos : S.chpos];
 }


 func (S *Scanner) ScanDigits(n int, base int) {
 	for digit_val(S.ch) < base {
 		S.Next();
 		n--;
 	}
 	if n > 0 {
 		S.Error(S.chpos, "illegal char escape");
 	}
 }


 func (S *Scanner) ScanEscape() string {
 	// TODO: fix this routine

 	ch := S.ch;
 	pos := S.chpos;
 	S.Next();
 	switch (ch) {
 	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '\'', '"':
 		return string(ch);

 	case '0', '1', '2', '3', '4', '5', '6', '7':
 		S.ScanDigits(3 - 1, 8);  // 1 char already read
 		return "";  // TODO fix this

 	case 'x':
 		S.ScanDigits(2, 16);
 		return "";  // TODO fix this

 	case 'u':
 		S.ScanDigits(4, 16);
 		return "";  // TODO fix this

 	case 'U':
 		S.ScanDigits(8, 16);
 		return "";  // TODO fix this

 	default:
 		S.Error(pos, "illegal char escape");
 	}

 	return "";  // TODO fix this
 }


 func (S *Scanner) ScanChar() string {
 	// '\'' already consumed

 	pos := S.chpos - 1;
 	ch := S.ch;
 	S.Next();
 	if ch == '\\' {
 		S.ScanEscape();
 	}

 	S.Expect('\'');
 	return S.src[pos : S.chpos];
 }


 func (S *Scanner) ScanString() string {
 	// '"' already consumed

 	pos := S.chpos - 1;
 	for S.ch != '"' {
 		ch := S.ch;
 		S.Next();
 		if ch == '\n' || ch < 0 {
 			S.Error(pos, "string not terminated");
 			break;
 		}
 		if ch == '\\' {
 			S.ScanEscape();
 		}
 	}

 	S.Next();
 	return S.src[pos : S.chpos];
 }


 func (S *Scanner) ScanRawString() string {
 	// '`' already consumed

 	pos := S.chpos - 1;
 	for S.ch != '`' {
 		ch := S.ch;
 		S.Next();
 		if ch == '\n' || ch < 0 {
 			S.Error(pos, "string not terminated");
 			break;
 		}
 	}

 	S.Next();
 	return S.src[pos : S.chpos];
 }


 func (S *Scanner) Select2(tok0, tok1 int) int {
 	if S.ch == '=' {
 		S.Next();
 		return tok1;
 	}
 	return tok0;
 }


 func (S *Scanner) Select3(tok0, tok1, ch2, tok2 int) int {
 	if S.ch == '=' {
 		S.Next();
 		return tok1;
 	}
 	if S.ch == ch2 {
 		S.Next();
 		return tok2;
 	}
 	return tok0;
 }


 func (S *Scanner) Select4(tok0, tok1, ch2, tok2, tok3 int) int {
 	if S.ch == '=' {
 		S.Next();
 		return tok1;
 	}
 	if S.ch == ch2 {
 		S.Next();
 		if S.ch == '=' {
 			S.Next();
 			return tok3;
 		}
 		return tok2;
 	}
 	return tok0;
 }


 func (S *Scanner) Scan() (tok, pos int, val string) {
 	S.SkipWhitespace();

 	ch := S.ch;
 	tok = ILLEGAL;
 	pos = S.chpos;

 	switch {
 	case is_letter(ch): tok, val = S.ScanIdentifier();
 	case digit_val(ch) < 10: tok, val = S.ScanNumber(false);
 	default:
 		S.Next();  // always make progress
 		switch ch {
 		case -1: tok = EOF;
 		case '"': tok, val = STRING, S.ScanString();
 		case '\'': tok, val = INT, S.ScanChar();
 		case '`': tok, val = STRING, S.ScanRawString();
 		case ':': tok = S.Select2(COLON, DEFINE);
 		case '.':
 			if digit_val(S.ch) < 10 {
 				tok, val = S.ScanNumber(true);
 			} else {
 				tok = PERIOD;
 			}
 		case ',': tok = COMMA;
 		case ';': tok = SEMICOLON;
 		case '(': tok = LPAREN;
 		case ')': tok = RPAREN;
 		case '[': tok = LBRACK;
 		case ']': tok = RBRACK;
 		case '{': tok = LBRACE;
 		case '}': tok = RBRACE;
 		case '+': tok = S.Select3(ADD, ADD_ASSIGN, '+', INC);
 		case '-': tok = S.Select3(SUB, SUB_ASSIGN, '-', DEC);
 		case '*': tok = S.Select2(MUL, MUL_ASSIGN);
 		case '/':
 			if S.ch == '/' || S.ch == '*' {
 				S.SkipComment();
 				// cannot simply return because of 6g bug
 				tok, pos, val = S.Scan();
 				return tok, pos, val;
 			}
 			tok = S.Select2(QUO, QUO_ASSIGN);
 		case '%': tok = S.Select2(REM, REM_ASSIGN);
 		case '^': tok = S.Select2(XOR, XOR_ASSIGN);
 		case '<':
 			if S.ch == '-' {
 				S.Next();
 				tok = ARROW;
 			} else {
 				tok = S.Select4(LSS, LEQ, '<', SHL, SHL_ASSIGN);
 			}
 		case '>': tok = S.Select4(GTR, GEQ, '>', SHR, SHR_ASSIGN);
 		case '=': tok = S.Select2(ASSIGN, EQL);
 		case '!': tok = S.Select2(NOT, NEQ);
 		case '&': tok = S.Select3(AND, AND_ASSIGN, '&', LAND);
 		case '|': tok = S.Select3(OR, OR_ASSIGN, '|', LOR);
 		default:
 			S.Error(pos, "illegal character " + CharString(ch));
 			tok = ILLEGAL;
 		}
 	}

 	return tok, pos, val;
 }


 export type Token struct {
 	pos int;
 	tok int;
 	val string;
 }


 func (S *Scanner) Server(c *chan *Token) {
 	for {
 		t := new(Token);
 		t.tok, t.pos, t.val = S.Scan();
 		c <- t;
 		if t.tok == EOF {
 			break;
 		}
 	}
 }
	// Copyright 2009 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package Scanner

	import Platform "platform"
	import Utils "utils"


	export const (
	ILLEGAL = iota;
	EOF;
	INT;
	FLOAT;
	STRING;

	COMMA;
	COLON;
	SEMICOLON;
	PERIOD;

	LPAREN;
	RPAREN;
	LBRACK;
	RBRACK;
	LBRACE;
	RBRACE;

	ASSIGN;
	DEFINE;

	INC;
	DEC;
	NOT;

	AND;
	OR;
	XOR;

	ADD;
	SUB;
	MUL;
	QUO;
	REM;

	EQL;
	NEQ;
	LSS;
	LEQ;
	GTR;
	GEQ;

	SHL;
	SHR;

	ARROW;

	ADD_ASSIGN;
	SUB_ASSIGN;
	MUL_ASSIGN;
	QUO_ASSIGN;
	REM_ASSIGN;

	AND_ASSIGN;
	OR_ASSIGN;
	XOR_ASSIGN;

	SHL_ASSIGN;
	SHR_ASSIGN;

	LAND;
	LOR;

	// IDENT must be immediately before keywords
	IDENT;

	// keywords
	KEYWORDS_BEG;
	BREAK;
	CASE;
	CHAN;
	CONST;
	CONTINUE;
	DEFAULT;
	ELSE;
	EXPORT;
	FALLTHROUGH;
	FALSE;
	FOR;
	FUNC;
	GO;
	GOTO;
	IF;
	IMPORT;
	INTERFACE;
	IOTA;
	MAP;
	NEW;
	NIL;
	PACKAGE;
	RANGE;
	RETURN;
	SELECT;
	STRUCT;
	SWITCH;
	TRUE;
	TYPE;
	VAR;
	KEYWORDS_END;
	)


	var Keywords *map [string] int;
	var VerboseMsgs bool; // error message customization


	export func TokenName(tok int) string {
	switch (tok) {
	case ILLEGAL: return "illegal";
	case EOF: return "eof";
	case INT: return "int";
	case FLOAT: return "float";
	case STRING: return "string";

	case COMMA: return ",";
	case COLON: return ":";
	case SEMICOLON: return ";";
	case PERIOD: return ".";

	case LPAREN: return "(";
	case RPAREN: return ")";
	case LBRACK: return "[";
	case RBRACK: return "]";
	case LBRACE: return "LBRACE";
	case RBRACE: return "RBRACE";

	case ASSIGN: return "=";
	case DEFINE: return ":=";

	case INC: return "++";
	case DEC: return "--";
	case NOT: return "!";

	case AND: return "&";
	case OR: return "\|";
	case XOR: return "^";

	case ADD: return "+";
	case SUB: return "-";
	case MUL: return "*";
	case QUO: return "/";
	case REM: return "%";

	case EQL: return "==";
	case NEQ: return "!=";
	case LSS: return "<";
	case LEQ: return "<=";
	case GTR: return ">";
	case GEQ: return ">=";

	case SHL: return "<<";
	case SHR: return ">>";

	case ARROW: return "<-";

	case ADD_ASSIGN: return "+=";
	case SUB_ASSIGN: return "-=";
	case MUL_ASSIGN: return "+=";
	case QUO_ASSIGN: return "/=";
	case REM_ASSIGN: return "%=";

	case AND_ASSIGN: return "&=";
	case OR_ASSIGN: return "\|=";
	case XOR_ASSIGN: return "^=";

	case SHL_ASSIGN: return "<<=";
	case SHR_ASSIGN: return ">>=";

	case LAND: return "&&";
	case LOR: return "\|\|";

	case IDENT: return "ident";

	case BREAK: return "break";
	case CASE: return "case";
	case CHAN: return "chan";
	case CONST: return "const";
	case CONTINUE: return "continue";
	case DEFAULT: return "default";
	case ELSE: return "else";
	case EXPORT: return "export";
	case FALLTHROUGH: return "fallthrough";
	case FALSE: return "false";
	case FOR: return "for";
	case FUNC: return "func";
	case GO: return "go";
	case GOTO: return "goto";
	case IF: return "if";
	case IMPORT: return "import";
	case INTERFACE: return "interface";
	case IOTA: return "iota";
	case MAP: return "map";
	case NEW: return "new";
	case NIL: return "nil";
	case PACKAGE: return "package";
	case RANGE: return "range";
	case RETURN: return "return";
	case SELECT: return "select";
	case STRUCT: return "struct";
	case SWITCH: return "switch";
	case TRUE: return "true";
	case TYPE: return "type";
	case VAR: return "var";
	}

	return "???";
	}


	func init() {
	Keywords = new(map [string] int);

	for i := KEYWORDS_BEG; i <= KEYWORDS_END; i++ {
	Keywords[TokenName(i)] = i;
	}

	// Provide column information in error messages for gri only...
	VerboseMsgs = Platform.USER == "gri";
	}


	func is_whitespace(ch int) bool {
	return ch == ' ' \|\| ch == '\r' \|\| ch == '\n' \|\| ch == '\t';
	}


	func is_letter(ch int) bool {
	return 'a' <= ch && ch <= 'z' \|\| 'A' <= ch && ch <= 'Z' \|\| ch == '_' \|\| ch >= 128 ;
	}


	func digit_val(ch int) int {
	if '0' <= ch && ch <= '9' {
	return ch - '0';
	}
	if 'a' <= ch && ch <= 'f' {
	return ch - 'a' + 10;
	}
	if 'A' <= ch && ch <= 'F' {
	return ch - 'A' + 10;
	}
	return 16; // larger than any legal digit val
	}


	export type Scanner struct {
	filename string; // error reporting only
	nerrors int; // number of errors
	errpos int; // last error position

	src string; // scanned source
	pos int; // current reading position
	ch int; // one char look-ahead
	chpos int; // position of ch
	}


	// Read the next Unicode char into S.ch.
	// S.ch < 0 means end-of-file.
	//
	func (S *Scanner) Next() {
	const (
	Bit1 = 7;
	Bitx = 6;
	Bit2 = 5;
	Bit3 = 4;
	Bit4 = 3;

	T1 = (1 << (Bit1 + 1) - 1) ^ 0xFF; // 0000 0000
	Tx = (1 << (Bitx + 1) - 1) ^ 0xFF; // 1000 0000
	T2 = (1 << (Bit2 + 1) - 1) ^ 0xFF; // 1100 0000
	T3 = (1 << (Bit3 + 1) - 1) ^ 0xFF; // 1110 0000
	T4 = (1 << (Bit4 + 1) - 1) ^ 0xFF; // 1111 0000

	Rune1 = 1 << (Bit1 + 0*Bitx) - 1; // 0000 0000 0111 1111
	Rune2 = 1 << (Bit2 + 1*Bitx) - 1; // 0000 0111 1111 1111
	Rune3 = 1 << (Bit3 + 2*Bitx) - 1; // 1111 1111 1111 1111

	Maskx = 0x3F; // 1 << Bitx - 1; // 0011 1111
	Testx = 0xC0; // Maskx ^ 0xFF; // 1100 0000

	Bad = 0xFFFD; // Runeerror
	);

	src := S.src;
	lim := len(src);
	pos := S.pos;

	// 1-byte sequence
	// 0000-007F => T1
	if pos >= lim {
	S.ch = -1; // end of file
	S.chpos = lim;
	return;
	}
	c0 := int(src[pos]);
	pos++;
	if c0 < Tx {
	S.ch = c0;
	S.chpos = S.pos;
	S.pos = pos;
	return;
	}

	// 2-byte sequence
	// 0080-07FF => T2 Tx
	if pos >= lim {
	goto bad;
	}
	c1 := int(src[pos]) ^ Tx;
	pos++;
	if c1 & Testx != 0 {
	goto bad;
	}
	if c0 < T3 {
	if c0 < T2 {
	goto bad;
	}
	r := (c0 << Bitx \| c1) & Rune2;
	if r <= Rune1 {
	goto bad;
	}
	S.ch = r;
	S.chpos = S.pos;
	S.pos = pos;
	return;
	}

	// 3-byte sequence
	// 0800-FFFF => T3 Tx Tx
	if pos >= lim {
	goto bad;
	}
	c2 := int(src[pos]) ^ Tx;
	pos++;
	if c2 & Testx != 0 {
	goto bad;
	}
	if c0 < T4 {
	r := (((c0 << Bitx \| c1) << Bitx) \| c2) & Rune3;
	if r <= Rune2 {
	goto bad;
	}
	S.ch = r;
	S.chpos = S.pos;
	S.pos = pos;
	return;
	}

	// bad encoding
	bad:
	S.ch = Bad;
	S.chpos = S.pos;
	S.pos += 1;
	return;
	}


	// Compute (line, column) information for a given source position.
	func (S *Scanner) LineCol(pos int) (line, col int) {
	line = 1;
	lpos := 0;

	src := S.src;
	if pos > len(src) {
	pos = len(src);
	}

	for i := 0; i < pos; i++ {
	if src[i] == '\n' {
	line++;
	lpos = i;
	}
	}

	return line, pos - lpos;
	}


	func (S *Scanner) Error(pos int, msg string) {
	const errdist = 10;
	delta := pos - S.errpos; // may be negative!
	if delta < 0 {
	delta = -delta;
	}
	if delta > errdist \|\| S.nerrors == 0 /* always report first error */ {
	print(S.filename);
	if pos >= 0 {
	// print position
	line, col := S.LineCol(pos);
	if VerboseMsgs {
	print(":", line, ":", col);
	} else {
	print(":", line);
	}
	}
	print(": ", msg, "\n");
	S.nerrors++;
	S.errpos = pos;
	}

	if S.nerrors >= 10 {
	sys.exit(1);
	}
	}


	func (S *Scanner) Open(filename, src string) {
	S.filename = filename;
	S.nerrors = 0;
	S.errpos = 0;

	S.src = src;
	S.pos = 0;
	S.Next();
	}


	func CharString(ch int) string {
	s := string(ch);
	switch ch {
	case '\a': s = `\a`;
	case '\b': s = `\b`;
	case '\f': s = `\f`;
	case '\n': s = `\n`;
	case '\r': s = `\r`;
	case '\t': s = `\t`;
	case '\v': s = `\v`;
	case '\\': s = `\\`;
	case '\'': s = `\'`;
	}
	return "'" + s + "' (U+" + Utils.IntToString(ch, 16) + ")";
	}


	func (S *Scanner) Expect(ch int) {
	if S.ch != ch {
	S.Error(S.chpos, "expected " + CharString(ch) + ", found " + CharString(S.ch));
	}
	S.Next(); // make always progress
	}


	func (S *Scanner) SkipWhitespace() {
	for is_whitespace(S.ch) {
	S.Next();
	}
	}


	func (S *Scanner) SkipComment() {
	// '/' already consumed
	if S.ch == '/' {
	// comment
	S.Next();
	for S.ch != '\n' && S.ch >= 0 {
	S.Next();
	}

	} else {
	/* comment */
	pos := S.chpos - 1;
	S.Expect('*');
	for S.ch >= 0 {
	ch := S.ch;
	S.Next();
	if ch == '*' && S.ch == '/' {
	S.Next();
	return;
	}
	}
	S.Error(pos, "comment not terminated");
	}
	}


	func (S *Scanner) ScanIdentifier() (tok int, val string) {
	pos := S.chpos;
	for is_letter(S.ch) \|\| digit_val(S.ch) < 10 {
	S.Next();
	}
	val = S.src[pos : S.chpos];

	var present bool;
	tok, present = Keywords[val];
	if !present {
	tok = IDENT;
	}

	return tok, val;
	}


	func (S *Scanner) ScanMantissa(base int) {
	for digit_val(S.ch) < base {
	S.Next();
	}
	}


	func (S *Scanner) ScanNumber(seen_decimal_point bool) (tok int, val string) {
	pos := S.chpos;
	tok = INT;

	if seen_decimal_point {
	tok = FLOAT;
	pos--; // '.' is one byte
	S.ScanMantissa(10);
	goto exponent;
	}

	if S.ch == '0' {
	// int or float
	S.Next();
	if S.ch == 'x' \|\| S.ch == 'X' {
	// hexadecimal int
	S.Next();
	S.ScanMantissa(16);
	} else {
	// octal int or float
	S.ScanMantissa(8);
	if digit_val(S.ch) < 10 \|\| S.ch == '.' \|\| S.ch == 'e' \|\| S.ch == 'E' {
	// float
	tok = FLOAT;
	goto mantissa;
	}
	// octal int
	}
	goto exit;
	}

	mantissa:
	// decimal int or float
	S.ScanMantissa(10);

	if S.ch == '.' {
	// float
	tok = FLOAT;
	S.Next();
	S.ScanMantissa(10)
	}

	exponent:
	if S.ch == 'e' \|\| S.ch == 'E' {
	// float
	tok = FLOAT;
	S.Next();
	if S.ch == '-' \|\| S.ch == '+' {
	S.Next();
	}
	S.ScanMantissa(10);
	}

	exit:
	return tok, S.src[pos : S.chpos];
	}


	func (S *Scanner) ScanDigits(n int, base int) {
	for digit_val(S.ch) < base {
	S.Next();
	n--;
	}
	if n > 0 {
	S.Error(S.chpos, "illegal char escape");
	}
	}


	func (S *Scanner) ScanEscape() string {
	// TODO: fix this routine

	ch := S.ch;
	pos := S.chpos;
	S.Next();
	switch (ch) {
	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '\'', '"':
	return string(ch);

	case '0', '1', '2', '3', '4', '5', '6', '7':
	S.ScanDigits(3 - 1, 8); // 1 char already read
	return ""; // TODO fix this

	case 'x':
	S.ScanDigits(2, 16);
	return ""; // TODO fix this

	case 'u':
	S.ScanDigits(4, 16);
	return ""; // TODO fix this

	case 'U':
	S.ScanDigits(8, 16);
	return ""; // TODO fix this

	default:
	S.Error(pos, "illegal char escape");
	}

	return ""; // TODO fix this
	}


	func (S *Scanner) ScanChar() string {
	// '\'' already consumed

	pos := S.chpos - 1;
	ch := S.ch;
	S.Next();
	if ch == '\\' {
	S.ScanEscape();
	}

	S.Expect('\'');
	return S.src[pos : S.chpos];
	}


	func (S *Scanner) ScanString() string {
	// '"' already consumed

	pos := S.chpos - 1;
	for S.ch != '"' {
	ch := S.ch;
	S.Next();
	if ch == '\n' \|\| ch < 0 {
	S.Error(pos, "string not terminated");
	break;
	}
	if ch == '\\' {
	S.ScanEscape();
	}
	}

	S.Next();
	return S.src[pos : S.chpos];
	}


	func (S *Scanner) ScanRawString() string {
	// '`' already consumed

	pos := S.chpos - 1;
	for S.ch != '`' {
	ch := S.ch;
	S.Next();
	if ch == '\n' \|\| ch < 0 {
	S.Error(pos, "string not terminated");
	break;
	}
	}

	S.Next();
	return S.src[pos : S.chpos];
	}


	func (S *Scanner) Select2(tok0, tok1 int) int {
	if S.ch == '=' {
	S.Next();
	return tok1;
	}
	return tok0;
	}


	func (S *Scanner) Select3(tok0, tok1, ch2, tok2 int) int {
	if S.ch == '=' {
	S.Next();
	return tok1;
	}
	if S.ch == ch2 {
	S.Next();
	return tok2;
	}
	return tok0;
	}


	func (S *Scanner) Select4(tok0, tok1, ch2, tok2, tok3 int) int {
	if S.ch == '=' {
	S.Next();
	return tok1;
	}
	if S.ch == ch2 {
	S.Next();
	if S.ch == '=' {
	S.Next();
	return tok3;
	}
	return tok2;
	}
	return tok0;
	}


	func (S *Scanner) Scan() (tok, pos int, val string) {
	S.SkipWhitespace();

	ch := S.ch;
	tok = ILLEGAL;
	pos = S.chpos;

	switch {
	case is_letter(ch): tok, val = S.ScanIdentifier();
	case digit_val(ch) < 10: tok, val = S.ScanNumber(false);
	default:
	S.Next(); // always make progress
	switch ch {
	case -1: tok = EOF;
	case '"': tok, val = STRING, S.ScanString();
	case '\'': tok, val = INT, S.ScanChar();
	case '`': tok, val = STRING, S.ScanRawString();
	case ':': tok = S.Select2(COLON, DEFINE);
	case '.':
	if digit_val(S.ch) < 10 {
	tok, val = S.ScanNumber(true);
	} else {
	tok = PERIOD;
	}
	case ',': tok = COMMA;
	case ';': tok = SEMICOLON;
	case '(': tok = LPAREN;
	case ')': tok = RPAREN;
	case '[': tok = LBRACK;
	case ']': tok = RBRACK;
	case '{': tok = LBRACE;
	case '}': tok = RBRACE;
	case '+': tok = S.Select3(ADD, ADD_ASSIGN, '+', INC);
	case '-': tok = S.Select3(SUB, SUB_ASSIGN, '-', DEC);
	case '*': tok = S.Select2(MUL, MUL_ASSIGN);
	case '/':
	if S.ch == '/' \|\| S.ch == '*' {
	S.SkipComment();
	// cannot simply return because of 6g bug
	tok, pos, val = S.Scan();
	return tok, pos, val;
	}
	tok = S.Select2(QUO, QUO_ASSIGN);
	case '%': tok = S.Select2(REM, REM_ASSIGN);
	case '^': tok = S.Select2(XOR, XOR_ASSIGN);
	case '<':
	if S.ch == '-' {
	S.Next();
	tok = ARROW;
	} else {
	tok = S.Select4(LSS, LEQ, '<', SHL, SHL_ASSIGN);
	}
	case '>': tok = S.Select4(GTR, GEQ, '>', SHR, SHR_ASSIGN);
	case '=': tok = S.Select2(ASSIGN, EQL);
	case '!': tok = S.Select2(NOT, NEQ);
	case '&': tok = S.Select3(AND, AND_ASSIGN, '&', LAND);
	case '\|': tok = S.Select3(OR, OR_ASSIGN, '\|', LOR);
	default:
	S.Error(pos, "illegal character " + CharString(ch));
	tok = ILLEGAL;
	}
	}

	return tok, pos, val;
	}


	export type Token struct {
	pos int;
	tok int;
	val string;
	}


	func (S Scanner) Server(c chan *Token) {
	for {
	t := new(Token);
	t.tok, t.pos, t.val = S.Scan();
	c <- t;
	if t.tok == EOF {
	break;
	}
	}
	}