compiler: read embedcfg files, parse go:embed directives

This change reads go:embed directives and attaches them to variables.
We still don't do anything with the directives.

This change also reads the file passed in the -fgo-embedcfg option.

Change-Id: I405e02682500aec12ea6c99423a9bf13c36f9f6a
Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/281533
Trust: Ian Lance Taylor <iant@golang.org>
Reviewed-by: Than McIntosh <thanm@google.com>
diff --git a/go/embed.cc b/go/embed.cc
index 19c6930..7ee8674 100644
--- a/go/embed.cc
+++ b/go/embed.cc
@@ -626,3 +626,18 @@
 		"%<-fgo-embedcfg%>: %s: %s",
 		this->filename_, msg);
 }
+
+// Return whether the current file imports "embed".
+
+bool
+Gogo::is_embed_imported() const
+{
+  Packages::const_iterator p = this->packages_.find("embed");
+  if (p == this->packages_.end())
+    return false;
+
+  // We track current file imports in the package aliases, where a
+  // typical import will just list the package name in aliases.  So
+  // the package has been imported if there is at least one alias.
+  return !p->second->aliases().empty();
+}
diff --git a/go/go.cc b/go/go.cc
index e026d65..404cb12 100644
--- a/go/go.cc
+++ b/go/go.cc
@@ -40,6 +40,8 @@
     ::gogo->set_compiling_runtime(args->compiling_runtime);
   if (args->c_header != NULL)
     ::gogo->set_c_header(args->c_header);
+  if (args->embedcfg != NULL)
+    ::gogo->read_embedcfg(args->embedcfg);
   ::gogo->set_debug_escape_level(args->debug_escape_level);
   if (args->debug_escape_hash != NULL)
     ::gogo->set_debug_escape_hash(args->debug_escape_hash);
diff --git a/go/gogo.cc b/go/gogo.cc
index fbf8935..4c795a2 100644
--- a/go/gogo.cc
+++ b/go/gogo.cc
@@ -7456,8 +7456,8 @@
 		   bool is_parameter, bool is_receiver,
 		   Location location)
   : type_(type), init_(init), preinit_(NULL), location_(location),
-    backend_(NULL), is_global_(is_global), is_parameter_(is_parameter),
-    is_closure_(false), is_receiver_(is_receiver),
+    embeds_(NULL), backend_(NULL), is_global_(is_global),
+    is_parameter_(is_parameter), is_closure_(false), is_receiver_(is_receiver),
     is_varargs_parameter_(false), is_global_sink_(false), is_used_(false),
     is_address_taken_(false), is_non_escaping_address_taken_(false),
     seen_(false), init_is_lowered_(false), init_is_flattened_(false),
diff --git a/go/gogo.h b/go/gogo.h
index 0d80bde..891ef69 100644
--- a/go/gogo.h
+++ b/go/gogo.h
@@ -397,6 +397,10 @@
   void
   read_embedcfg(const char* filename);
 
+  // Return whether the current file imports "embed".
+  bool
+  is_embed_imported() const;
+
   // Return whether to check for division by zero in binary operations.
   bool
   check_divide_by_zero() const
@@ -2276,6 +2280,16 @@
     this->is_referenced_by_inline_ = true;
   }
 
+  // Attach any go:embed comments for this variable.
+  void
+  set_embeds(std::vector<std::string>* embeds)
+  {
+    go_assert(this->is_global_
+	      && this->init_ == NULL
+	      && this->preinit_ == NULL);
+    this->embeds_ = embeds;
+  }
+
   // Return the top-level declaration for this variable.
   Statement*
   toplevel_decl()
@@ -2346,6 +2360,8 @@
   Block* preinit_;
   // Location of variable definition.
   Location location_;
+  // Any associated go:embed comments.
+  std::vector<std::string>* embeds_;
   // Backend representation.
   Bvariable* backend_;
   // Whether this is a global variable.
diff --git a/go/lex.cc b/go/lex.cc
index 0baf4e4..dd66c02 100644
--- a/go/lex.cc
+++ b/go/lex.cc
@@ -2035,6 +2035,8 @@
 	  (*this->linknames_)[go_name] = Linkname(ext_name, is_exported, loc);
 	}
     }
+  else if (verb == "go:embed")
+    this->gather_embed(ps, pend);
   else if (verb == "go:nointerface")
     {
       // For field tracking analysis: a //go:nointerface comment means
@@ -2111,6 +2113,98 @@
     }
 }
 
+// Read a go:embed directive.  This is a series of space-separated
+// patterns.  Each pattern may be a quoted or backquoted string.
+
+void
+Lex::gather_embed(const char *p, const char *pend)
+{
+  while (true)
+    {
+      // Skip spaces to find the start of the next pattern.  We do a
+      // fast skip of space and tab, but we also permit and skip
+      // Unicode space characters.
+      while (p < pend && (*p == ' ' || *p == '\t'))
+	++p;
+      if (p >= pend)
+	break;
+      unsigned int c;
+      bool issued_error;
+      const char *pnext = this->advance_one_utf8_char(p, &c, &issued_error);
+      if (issued_error)
+	return;
+      if (Lex::is_unicode_space(c))
+	{
+	  p = pnext;
+	  continue;
+	}
+
+      // Here P points to the start of the next pattern, PNEXT points
+      // to the second character in the pattern, and C is the first
+      // character in that pattern (the character to which P points).
+
+      if (c == '"' || c == '`')
+	{
+	  Location loc = this->location();
+	  const unsigned char quote = c;
+	  std::string value;
+	  p = pnext;
+	  while (p < pend && *p != quote)
+	    {
+	      bool is_character;
+	      if (quote == '"')
+		p = this->advance_one_char(p, false, &c, &is_character);
+	      else
+		{
+		  p = this->advance_one_utf8_char(p, &c, &issued_error);
+		  if (issued_error)
+		    return;
+		  // "Carriage return characters ('\r') inside raw string
+		  // literals are discarded from the raw string value."
+		  if (c == '\r')
+		    continue;
+		  is_character = true;
+		}
+	      Lex::append_char(c, is_character, &value, loc);
+	    }
+	  if (p >= pend)
+	    {
+	      // Note that within a go:embed directive we do not
+	      // permit raw strings to cross multiple lines.
+	      go_error_at(loc, "unterminated string");
+	      return;
+	    }
+	  this->embeds_.push_back(value);
+	  ++p;
+	}
+      else
+	{
+	  const char *start = p;
+	  p = pnext;
+	  while (p < pend)
+	    {
+	      c = *p;
+	      if (c == ' ' || c == '\t')
+		break;
+	      if (c > ' ' && c <= 0x7f)
+		{
+		  // ASCII non-space character.
+		  ++p;
+		  continue;
+		}
+	      pnext = this->advance_one_utf8_char(p, &c, &issued_error);
+	      if (issued_error)
+		return;
+	      if (Lex::is_unicode_space(c))
+		break;
+	      p = pnext;
+	    }
+
+	  this->embeds_.push_back(std::string(start, p - start));
+	}
+    }
+}
+
 // The Unicode tables use this struct.
 
 struct Unicode_range
diff --git a/go/lex.h b/go/lex.h
index 3be3806..75c8429 100644
--- a/go/lex.h
+++ b/go/lex.h
@@ -405,6 +405,21 @@
     return ret;
   }
 
+  // Return whether there are any current go:embed patterns.
+  bool
+  has_embeds() const
+  { return !this->embeds_.empty(); }
+
+  // If there are any go:embed patterns seen so far, store them in
+  // *EMBEDS and clear the saved set.  *EMBEDS must be an empty
+  // vector.
+  void
+  get_and_clear_embeds(std::vector<std::string>* embeds)
+  {
+    go_assert(embeds->empty());
+    std::swap(*embeds, this->embeds_);
+  }
+
   // Return whether the identifier NAME should be exported.  NAME is a
   // mangled name which includes only ASCII characters.
   static bool
@@ -536,6 +551,9 @@
   void
   skip_cpp_comment();
 
+  void
+  gather_embed(const char*, const char*);
+
   // The input file name.
   const char* input_file_name_;
   // The input file.
@@ -561,6 +579,8 @@
   std::string extern_;
   // The list of //go:linkname comments, if any.
   Linknames* linknames_;
+  // The list of //go:embed patterns, if any.
+  std::vector<std::string> embeds_;
 };
 
 #endif // !defined(GO_LEX_H)
diff --git a/go/parse.cc b/go/parse.cc
index 1664fe3..fd81a85 100644
--- a/go/parse.cc
+++ b/go/parse.cc
@@ -1315,12 +1315,36 @@
     go_warning_at(token->location(), 0,
 		  "ignoring magic comment before non-function");
 
+  std::vector<std::string>* embeds = NULL;
+  if (this->lex_->has_embeds())
+    {
+      embeds = new(std::vector<std::string>);
+      this->lex_->get_and_clear_embeds(embeds);
+
+      if (!this->gogo_->is_embed_imported())
+	{
+	  go_error_at(token->location(),
+		      "invalid go:embed: missing import %<embed%>");
+	  delete embeds;
+	  embeds = NULL;
+	}
+      if (!token->is_keyword(KEYWORD_VAR))
+	{
+	  go_error_at(token->location(), "misplaced go:embed directive");
+	  if (embeds != NULL)
+	    {
+	      delete embeds;
+	      embeds = NULL;
+	    }
+	}
+    }
+
   if (token->is_keyword(KEYWORD_CONST))
     this->const_decl();
   else if (token->is_keyword(KEYWORD_TYPE))
     this->type_decl(pragmas);
   else if (token->is_keyword(KEYWORD_VAR))
-    this->var_decl();
+    this->var_decl(embeds);
   else if (token->is_keyword(KEYWORD_FUNC))
     this->function_decl(pragmas);
   else
@@ -1343,8 +1367,8 @@
 // Decl<P> = P | "(" [ List<P> ] ")" .
 
 void
-Parse::decl(void (Parse::*pfn)(void*, unsigned int), void* varg,
-	    unsigned int pragmas)
+Parse::decl(void (Parse::*pfn)(unsigned int, std::vector<std::string>*),
+	    unsigned int pragmas, std::vector<std::string>* embeds)
 {
   if (this->peek_token()->is_eof())
     {
@@ -1354,15 +1378,18 @@
     }
 
   if (!this->peek_token()->is_op(OPERATOR_LPAREN))
-    (this->*pfn)(varg, pragmas);
+    (this->*pfn)(pragmas, embeds);
   else
     {
       if (pragmas != 0)
 	go_warning_at(this->location(), 0,
 		      "ignoring magic %<//go:...%> comment before group");
+      if (embeds != NULL)
+	go_error_at(this->location(),
+		    "ignoring %<//go:embed%> comment before group");
       if (!this->advance_token()->is_op(OPERATOR_RPAREN))
 	{
-	  this->list(pfn, varg, true);
+	  this->list(pfn, true);
 	  if (!this->peek_token()->is_op(OPERATOR_RPAREN))
 	    {
 	      go_error_at(this->location(), "missing %<)%>");
@@ -1383,10 +1410,10 @@
 // might follow.  This is either a '}' or a ')'.
 
 void
-Parse::list(void (Parse::*pfn)(void*, unsigned int), void* varg,
+Parse::list(void (Parse::*pfn)(unsigned int, std::vector<std::string>*),
 	    bool follow_is_paren)
 {
-  (this->*pfn)(varg, 0);
+  (this->*pfn)(0, NULL);
   Operator follow = follow_is_paren ? OPERATOR_RPAREN : OPERATOR_RCURLY;
   while (this->peek_token()->is_op(OPERATOR_SEMICOLON)
 	 || this->peek_token()->is_op(OPERATOR_COMMA))
@@ -1395,7 +1422,7 @@
 	go_error_at(this->location(), "unexpected comma");
       if (this->advance_token()->is_op(follow))
 	break;
-      (this->*pfn)(varg, 0);
+      (this->*pfn)(0, NULL);
     }
 }
 
@@ -1522,13 +1549,13 @@
 {
   go_assert(this->peek_token()->is_keyword(KEYWORD_TYPE));
   this->advance_token();
-  this->decl(&Parse::type_spec, NULL, pragmas);
+  this->decl(&Parse::type_spec, pragmas, NULL);
 }
 
 // TypeSpec = identifier ["="] Type .
 
 void
-Parse::type_spec(void*, unsigned int pragmas)
+Parse::type_spec(unsigned int pragmas, std::vector<std::string>*)
 {
   const Token* token = this->peek_token();
   if (!token->is_identifier())
@@ -1622,27 +1649,42 @@
 // VarDecl = "var" Decl<VarSpec> .
 
 void
-Parse::var_decl()
+Parse::var_decl(std::vector<std::string>* embeds)
 {
   go_assert(this->peek_token()->is_keyword(KEYWORD_VAR));
   this->advance_token();
-  this->decl(&Parse::var_spec, NULL, 0);
+  this->decl(&Parse::var_spec, 0, embeds);
 }
 
 // VarSpec = IdentifierList
 //             ( CompleteType [ "=" ExpressionList ] | "=" ExpressionList ) .
 
 void
-Parse::var_spec(void*, unsigned int pragmas)
+Parse::var_spec(unsigned int pragmas, std::vector<std::string>* embeds)
 {
+  Location loc = this->location();
+
   if (pragmas != 0)
-    go_warning_at(this->location(), 0,
-		  "ignoring magic %<//go:...%> comment before var");
+    go_warning_at(loc, 0, "ignoring magic %<//go:...%> comment before var");
 
   // Get the variable names.
   Typed_identifier_list til;
   this->identifier_list(&til);
 
+  if (embeds != NULL)
+    {
+      if (!this->gogo_->in_global_scope())
+	{
+	  go_error_at(loc, "go:embed only permitted at package scope");
+	  embeds = NULL;
+	}
+      if (til.size() > 1)
+	{
+	  go_error_at(loc, "go:embed cannot apply to multiple vars");
+	  embeds = NULL;
+	}
+    }
+
   Location location = this->location();
 
   Type* type = NULL;
@@ -1670,7 +1712,13 @@
       init = this->expression_list(NULL, false, true);
     }
 
-  this->init_vars(&til, type, init, false, location);
+  if (embeds != NULL && init != NULL)
+    {
+      go_error_at(loc, "go:embed cannot apply to var with initializer");
+      embeds = NULL;
+    }
+
+  this->init_vars(&til, type, init, false, embeds, location);
 
   if (init != NULL)
     delete init;
@@ -1683,11 +1731,12 @@
 void
 Parse::init_vars(const Typed_identifier_list* til, Type* type,
 		 Expression_list* init, bool is_coloneq,
-		 Location location)
+		 std::vector<std::string>* embeds, Location location)
 {
   // Check for an initialization which can yield multiple values.
   if (init != NULL && init->size() == 1 && til->size() > 1)
     {
+      go_assert(embeds == NULL);
       if (this->init_vars_from_call(til, type, *init->begin(), is_coloneq,
 				    location))
 	return;
@@ -1729,8 +1778,12 @@
     {
       if (init != NULL)
 	go_assert(pexpr != init->end());
-      this->init_var(*p, type, init == NULL ? NULL : *pexpr, is_coloneq,
-		     false, &any_new, vars, vals);
+      Named_object* no = this->init_var(*p, type,
+					init == NULL ? NULL : *pexpr,
+					is_coloneq, false, &any_new,
+					vars, vals);
+      if (embeds != NULL && no->is_variable())
+	no->var_value()->set_embeds(embeds);
       if (init != NULL)
 	++pexpr;
     }
@@ -2270,7 +2323,7 @@
 	}
     }
 
-  this->init_vars(&til, NULL, init, true, location);
+  this->init_vars(&til, NULL, init, true, NULL, location);
 }
 
 // FunctionDecl = "func" identifier Signature [ Block ] .
@@ -5317,7 +5370,7 @@
 	{
 	  go_error_at(this->location(),
                       "var declaration not allowed in for initializer");
-	  this->var_decl();
+	  this->var_decl(NULL);
 	}
 
       if (token->is_op(OPERATOR_SEMICOLON))
@@ -5762,13 +5815,13 @@
 {
   go_assert(this->peek_token()->is_keyword(KEYWORD_IMPORT));
   this->advance_token();
-  this->decl(&Parse::import_spec, NULL, 0);
+  this->decl(&Parse::import_spec, 0, NULL);
 }
 
 // ImportSpec = [ "." | PackageName ] PackageFileName .
 
 void
-Parse::import_spec(void*, unsigned int pragmas)
+Parse::import_spec(unsigned int pragmas, std::vector<std::string>*)
 {
   if (pragmas != 0)
     go_warning_at(this->location(), 0,
diff --git a/go/parse.h b/go/parse.h
index 4a5a4b8..2c3c505 100644
--- a/go/parse.h
+++ b/go/parse.h
@@ -181,16 +181,17 @@
   void method_spec(Typed_identifier_list*);
   void declaration();
   bool declaration_may_start_here();
-  void decl(void (Parse::*)(void*, unsigned int), void*, unsigned int pragmas);
-  void list(void (Parse::*)(void*, unsigned int), void*, bool);
+  void decl(void (Parse::*)(unsigned int, std::vector<std::string>*),
+	    unsigned int pragmas, std::vector<std::string>* embeds);
+  void list(void (Parse::*)(unsigned int, std::vector<std::string>*), bool);
   void const_decl();
   void const_spec(int, Type**, Expression_list**);
   void type_decl(unsigned int pragmas);
-  void type_spec(void*, unsigned int pragmas);
-  void var_decl();
-  void var_spec(void*, unsigned int pragmas);
+  void type_spec(unsigned int pragmas, std::vector<std::string>*);
+  void var_decl(std::vector<std::string>* embeds);
+  void var_spec(unsigned int pragmas, std::vector<std::string>*);
   void init_vars(const Typed_identifier_list*, Type*, Expression_list*,
-		 bool is_coloneq, Location);
+		 bool is_coloneq, std::vector<std::string>*, Location);
   bool init_vars_from_call(const Typed_identifier_list*, Type*, Expression*,
 			   bool is_coloneq, Location);
   bool init_vars_from_map(const Typed_identifier_list*, Type*, Expression*,
@@ -277,7 +278,7 @@
   void goto_stat();
   void package_clause();
   void import_decl();
-  void import_spec(void*, unsigned int pragmas);
+  void import_spec(unsigned int pragmas, std::vector<std::string>*);
 
   // Skip past an error looking for a semicolon or OP.  Return true if
   // all is well, false if we found EOF.