blob: 0584f707ce6bac47dce95e1b7c5a29bdf81688d5 [file] [log] [blame]
// embed.cc -- Go frontend go:embed handling.
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go-system.h"
#include "operator.h"
#include "go-diagnostics.h"
#include "lex.h"
#include "types.h"
#include "expressions.h"
#include "gogo.h"
#ifndef O_BINARY
#define O_BINARY 0
#endif
// Read a file into *DATA. Returns false on error.
static bool
read_file(const char* filename, Location loc, std::string* data)
{
int fd = open(filename, O_RDONLY | O_BINARY);
if (fd < 0)
{
go_error_at(loc, "%s: %m", filename);
return false;
}
struct stat st;
if (fstat(fd, &st) < 0)
{
go_error_at(loc, "%s: %m", filename);
return false;
}
off_t want = st.st_size;
// Most files read here are going to be incorporated into the object file
// and then the executable. Set a limit on the size we will accept.
if (want > 2000000000)
{
go_error_at(loc, "%s: file too large", filename);
return false;
}
data->resize(want);
off_t got = 0;
while (want > 0)
{
// C++11 requires that std::string use contiguous bytes, so this
// is safe.
ssize_t n = read(fd, &(*data)[got], want);
if (n < 0)
{
close(fd);
go_error_at(loc, "%s: %m", filename);
return false;
}
if (n == 0)
{
data->resize(got);
break;
}
got += n;
want -= n;
}
close(fd);
return true;
}
// A JSON value as read from an embedcfg file. For our purposes a
// JSON value is a string, or a list of strings, or a mapping from
// strings to values. We don't expect any numbers. We also don't
// expect an array of anything other than strings; that is, we don't
// accept an array of general JSON values.
class Json_value
{
public:
// The types of values.
enum Json_value_classification
{
JSON_VALUE_UNKNOWN,
JSON_VALUE_STRING,
JSON_VALUE_ARRAY,
JSON_VALUE_MAP
};
Json_value()
: classification_(JSON_VALUE_UNKNOWN), string_(), array_(), map_()
{ }
~Json_value();
Json_value_classification
classification() const
{ return this->classification_; }
// Set to a string value.
void
set_string(const std::string& str)
{
go_assert(this->classification_ == JSON_VALUE_UNKNOWN);
this->classification_ = JSON_VALUE_STRING;
this->string_ = str;
}
// Start an array value.
void
start_array()
{
go_assert(this->classification_ == JSON_VALUE_UNKNOWN);
this->classification_ = JSON_VALUE_ARRAY;
}
// Add an array entry.
void
add_array_entry(const std::string& s)
{
go_assert(this->classification_ == JSON_VALUE_ARRAY);
this->array_.push_back(s);
}
// Start a map value.
void
start_map()
{
go_assert(this->classification_ == JSON_VALUE_UNKNOWN);
this->classification_ = JSON_VALUE_MAP;
}
// Add a map entry.
void
add_map_entry(const std::string& key, Json_value* val)
{
go_assert(this->classification_ == JSON_VALUE_MAP);
this->map_[key] = val;
}
// Return the strings from a string value.
const std::string&
to_string() const
{
go_assert(this->classification_ == JSON_VALUE_STRING);
return this->string_;
}
// Fetch a vector of strings, and drop them from the JSON value.
void
get_and_clear_array(std::vector<std::string>* v)
{
go_assert(this->classification_ == JSON_VALUE_ARRAY);
std::swap(*v, this->array_);
}
// Look up a map entry. Returns NULL if not found.
Json_value*
lookup_map_entry(const std::string& key);
// Iterate over a map.
typedef Unordered_map(std::string, Json_value*)::iterator map_iterator;
map_iterator
map_begin()
{
go_assert(this->classification_ == JSON_VALUE_MAP);
return this->map_.begin();
}
map_iterator
map_end()
{ return this->map_.end(); }
private:
// Classification.
Json_value_classification classification_;
// A string, for JSON_VALUE_STRING.
std::string string_;
// Array, for JSON_VALUE_ARRAY.
std::vector<std::string> array_;
// Mapping, for JSON_VALUE_MAP.
Unordered_map(std::string, Json_value*) map_;
};
// Delete a JSON value.
Json_value::~Json_value()
{
if (this->classification_ == JSON_VALUE_MAP)
{
for (map_iterator p = this->map_begin();
p != this->map_end();
++p)
delete p->second;
}
}
// Look up a map entry in a JSON value.
Json_value*
Json_value::lookup_map_entry(const std::string& key)
{
go_assert(this->classification_ == JSON_VALUE_MAP);
Unordered_map(std::string, Json_value*)::iterator p = this->map_.find(key);
if (p == this->map_.end())
return NULL;
return p->second;
}
// Manage reading the embedcfg file.
class Embedcfg_reader
{
public:
Embedcfg_reader(const char* filename)
: filename_(filename), data_(), p_(NULL), pend_(NULL)
{}
// Read the contents of FILENAME. Return whether it succeeded.
bool
initialize_from_file();
// Read a JSON object.
bool
read_object(Json_value*);
// Report an error if not at EOF.
void
check_eof();
// Report an error for the embedcfg file.
void
error(const char* msg);
private:
bool
read_value(Json_value*);
bool
read_array(Json_value*);
bool
read_string(std::string*);
bool
skip_whitespace(bool eof_ok);
// File name.
const char* filename_;
// File contents.
std::string data_;
// Next character to process.
const char *p_;
// End of data.
const char *pend_;
};
// Read the embedcfg file.
void
Gogo::read_embedcfg(const char *filename)
{
class Embedcfg_reader r(filename);
if (!r.initialize_from_file())
return;
Json_value val;
if (!r.read_object(&val))
return;
r.check_eof();
if (val.classification() != Json_value::JSON_VALUE_MAP)
{
r.error("invalid embedcfg: not a JSON object");
return;
}
Json_value* patterns = val.lookup_map_entry("Patterns");
if (patterns == NULL)
{
r.error("invalid embedcfg: missing Patterns");
return;
}
if (patterns->classification() != Json_value::JSON_VALUE_MAP)
{
r.error("invalid embedcfg: Patterns is not a JSON object");
return;
}
Json_value* files = val.lookup_map_entry("Files");
if (files == NULL)
{
r.error("invalid embedcfg: missing Files");
return;
}
if (files->classification() != Json_value::JSON_VALUE_MAP)
{
r.error("invalid embedcfg: Files is not a JSON object");
return;
}
for (Json_value::map_iterator p = patterns->map_begin();
p != patterns->map_end();
++p)
{
if (p->second->classification() != Json_value::JSON_VALUE_ARRAY)
{
r.error("invalid embedcfg: Patterns entry is not an array");
return;
}
std::vector<std::string> files;
p->second->get_and_clear_array(&files);
std::pair<std::string, std::vector<std::string> > val;
val.first = p->first;
std::pair<Embed_patterns::iterator, bool> ins =
this->embed_patterns_.insert(val);
if (!ins.second)
{
r.error("invalid embedcfg: duplicate Patterns entry");
return;
}
std::swap(ins.first->second, files);
}
for (Json_value::map_iterator p = files->map_begin();
p != files->map_end();
++p)
{
if (p->second->classification() != Json_value::JSON_VALUE_STRING)
{
r.error("invalid embedcfg: Files entry is not a string");
return;
}
this->embed_files_[p->first] = p->second->to_string();
}
}
// Read the contents of FILENAME into this->data_. Returns whether it
// succeeded.
bool
Embedcfg_reader::initialize_from_file()
{
if (!read_file(this->filename_, Linemap::unknown_location(), &this->data_))
return false;
if (this->data_.empty())
{
this->error("empty file");
return false;
}
this->p_ = this->data_.data();
this->pend_ = this->p_ + this->data_.size();
return true;
}
// Read a JSON object into VAL. Return whether it succeeded.
bool
Embedcfg_reader::read_object(Json_value* val)
{
if (!this->skip_whitespace(false))
return false;
if (*this->p_ != '{')
{
this->error("expected %<{%>");
return false;
}
++this->p_;
val->start_map();
if (!this->skip_whitespace(false))
return false;
if (*this->p_ == '}')
{
++this->p_;
return true;
}
while (true)
{
if (!this->skip_whitespace(false))
return false;
if (*this->p_ != '"')
{
this->error("expected %<\"%>");
return false;
}
std::string key;
if (!this->read_string(&key))
return false;
if (!this->skip_whitespace(false))
return false;
if (*this->p_ != ':')
{
this->error("expected %<:%>");
return false;
}
++this->p_;
Json_value* subval = new Json_value();
if (!this->read_value(subval))
return false;
val->add_map_entry(key, subval);
if (!this->skip_whitespace(false))
return false;
if (*this->p_ == '}')
{
++this->p_;
return true;
}
if (*this->p_ != ',')
{
this->error("expected %<,%> or %<}%>");
return false;
}
++this->p_;
}
}
// Read a JSON array into VAL. Return whether it succeeded.
bool
Embedcfg_reader::read_array(Json_value* val)
{
if (!this->skip_whitespace(false))
return false;
if (*this->p_ != '[')
{
this->error("expected %<[%>");
return false;
}
++this->p_;
val->start_array();
if (!this->skip_whitespace(false))
return false;
if (*this->p_ == ']')
{
++this->p_;
return true;
}
while (true)
{
// If we were parsing full JSON we would call read_value here,
// not read_string.
std::string s;
if (!this->read_string(&s))
return false;
val->add_array_entry(s);
if (!this->skip_whitespace(false))
return false;
if (*this->p_ == ']')
{
++this->p_;
return true;
}
if (*this->p_ != ',')
{
this->error("expected %<,%> or %<]%>");
return false;
}
++this->p_;
}
}
// Read a JSON value into VAL. Return whether it succeeded.
bool
Embedcfg_reader::read_value(Json_value* val)
{
if (!this->skip_whitespace(false))
return false;
switch (*this->p_)
{
case '"':
{
std::string s;
if (!this->read_string(&s))
return false;
val->set_string(s);
return true;
}
case '{':
return this->read_object(val);
case '[':
return this->read_array(val);
default:
this->error("invalid JSON syntax");
return false;
}
}
// Read a JSON string. Return whether it succeeded.
bool
Embedcfg_reader::read_string(std::string* str)
{
if (!this->skip_whitespace(false))
return false;
if (*this->p_ != '"')
{
this->error("expected %<\"%>");
return false;
}
++this->p_;
str->clear();
while (this->p_ < this->pend_ && *this->p_ != '"')
{
if (*this->p_ != '\\')
{
str->push_back(*this->p_);
++this->p_;
continue;
}
++this->p_;
if (this->p_ >= this->pend_)
{
this->error("unterminated string");
return false;
}
switch (*this->p_)
{
case '"': case '\\': case '/':
str->push_back(*this->p_);
++this->p_;
break;
case 'b':
str->push_back('\b');
++this->p_;
break;
case 'f':
str->push_back('\f');
++this->p_;
break;
case 'n':
str->push_back('\n');
++this->p_;
break;
case 'r':
str->push_back('\r');
++this->p_;
break;
case 't':
str->push_back('\t');
++this->p_;
break;
case 'u':
{
++this->p_;
unsigned int rune = 0;
for (int i = 0; i < 4; i++)
{
if (this->p_ >= this->pend_)
{
this->error("unterminated string");
return false;
}
unsigned char c = *this->p_;
++this->p_;
rune <<= 4;
if (c >= '0' && c <= '9')
rune += c - '0';
else if (c >= 'A' && c <= 'F')
rune += c - 'A' + 10;
else if (c >= 'a' && c <= 'f')
rune += c - 'a' + 10;
else
{
this->error("invalid hex digit");
return false;
}
}
Lex::append_char(rune, false, str, Linemap::unknown_location());
}
break;
default:
this->error("unrecognized string escape");
return false;
}
}
if (*this->p_ == '"')
{
++this->p_;
return true;
}
this->error("unterminated string");
return false;
}
// Report an error if not at EOF.
void
Embedcfg_reader::check_eof()
{
if (this->skip_whitespace(true))
this->error("extraneous data at end of file");
}
// Skip whitespace. Return whether there is more to read.
bool
Embedcfg_reader::skip_whitespace(bool eof_ok)
{
while (this->p_ < this->pend_)
{
switch (*this->p_)
{
case ' ': case '\t': case '\n': case '\r':
++this->p_;
break;
default:
return true;
}
}
if (!eof_ok)
this->error("unexpected EOF");
return false;
}
// Report an error.
void
Embedcfg_reader::error(const char* msg)
{
if (!this->data_.empty() && this->p_ != NULL)
go_error_at(Linemap::unknown_location(),
"%<-fgo-embedcfg%>: %s: %lu: %s",
this->filename_,
static_cast<unsigned long>(this->p_ - this->data_.data()),
msg);
else
go_error_at(Linemap::unknown_location(),
"%<-fgo-embedcfg%>: %s: %s",
this->filename_, msg);
}
// Implement the sort order for a list of embedded files, as discussed
// at the docs for embed.FS.
class Embedfs_sort
{
public:
bool
operator()(const std::string& p1, const std::string& p2) const;
private:
void
split(const std::string&, size_t*, size_t*, size_t*) const;
};
bool
Embedfs_sort::operator()(const std::string& p1, const std::string& p2) const
{
size_t dirlen1, elem1, elemlen1;
this->split(p1, &dirlen1, &elem1, &elemlen1);
size_t dirlen2, elem2, elemlen2;
this->split(p2, &dirlen2, &elem2, &elemlen2);
if (dirlen1 == 0)
{
if (dirlen2 > 0)
{
int i = p2.compare(0, dirlen2, ".");
if (i != 0)
return i > 0;
}
}
else if (dirlen2 == 0)
{
int i = p1.compare(0, dirlen1, ".");
if (i != 0)
return i < 0;
}
else
{
int i = p1.compare(0, dirlen1, p2, 0, dirlen2);
if (i != 0)
return i < 0;
}
int i = p1.compare(elem1, elemlen1, p2, elem2, elemlen2);
return i < 0;
}
// Pick out the directory and file name components for comparison.
void
Embedfs_sort::split(const std::string& s, size_t* dirlen, size_t* elem,
size_t* elemlen) const
{
size_t len = s.size();
if (len > 0 && s[len - 1] == '/')
--len;
size_t slash = s.rfind('/', len - 1);
if (slash == std::string::npos)
{
*dirlen = 0;
*elem = 0;
*elemlen = len;
}
else
{
*dirlen = slash;
*elem = slash + 1;
*elemlen = len - (slash + 1);
}
}
// Convert the go:embed directives for a variable into an initializer
// for that variable.
Expression*
Gogo::initializer_for_embeds(Type* type,
const std::vector<std::string>* embeds,
Location loc)
{
if (this->embed_patterns_.empty())
{
go_error_at(loc,
("invalid go:embed: build system did not "
"supply embed configuration"));
return Expression::make_error(loc);
}
type = type->unalias();
enum {
EMBED_STRING = 0,
EMBED_BYTES = 1,
EMBED_FS = 2
} embed_kind;
const Named_type* nt = type->named_type();
if (nt != NULL
&& nt->named_object()->package() != NULL
&& nt->named_object()->package()->pkgpath() == "embed"
&& nt->name() == "FS")
embed_kind = EMBED_FS;
else if (type->is_string_type())
embed_kind = EMBED_STRING;
else if (type->is_slice_type()
&& type->array_type()->element_type()->integer_type() != NULL
&& type->array_type()->element_type()->integer_type()->is_byte())
embed_kind = EMBED_BYTES;
else
{
go_error_at(loc, "invalid type for go:embed");
return Expression::make_error(loc);
}
// The patterns in the go:embed directive(s) are in EMBEDS. Find
// them in the patterns in the embedcfg file.
Unordered_set(std::string) have;
std::vector<std::string> paths;
for (std::vector<std::string>::const_iterator pe = embeds->begin();
pe != embeds->end();
pe++)
{
Embed_patterns::const_iterator pp = this->embed_patterns_.find(*pe);
if (pp == this->embed_patterns_.end())
{
go_error_at(loc,
("invalid go:embed: build system did not "
"map pattern %<%s%>"),
pe->c_str());
continue;
}
// Each pattern in the embedcfg file maps to a list of file
// names. Add those file names to PATHS.
for (std::vector<std::string>::const_iterator pf = pp->second.begin();
pf != pp->second.end();
pf++)
{
if (this->embed_files_.find(*pf) == this->embed_files_.end())
{
go_error_at(loc,
("invalid go:embed: build system did not "
"map file %<%s%>"),
pf->c_str());
continue;
}
std::pair<Unordered_set(std::string)::iterator, bool> ins
= have.insert(*pf);
if (ins.second)
{
const std::string& path(*pf);
paths.push_back(path);
if (embed_kind == EMBED_FS)
{
// Add each required directory, with a trailing slash.
size_t i = std::string::npos;
while (i > 0)
{
i = path.rfind('/', i);
if (i == std::string::npos)
break;
std::string dir = path.substr(0, i + 1);
ins = have.insert(dir);
if (ins.second)
paths.push_back(dir);
--i;
}
}
}
}
}
if (embed_kind == EMBED_STRING || embed_kind == EMBED_BYTES)
{
if (paths.size() > 1)
{
go_error_at(loc,
("invalid go:embed: multiple files for "
"string or byte slice"));;
return Expression::make_error(loc);
}
std::string data;
if (!read_file(this->embed_files_[paths[0]].c_str(), loc, &data))
return Expression::make_error(loc);
Expression* e = Expression::make_string(data, loc);
if (embed_kind == EMBED_BYTES)
e = Expression::make_cast(type, e, loc);
return e;
}
std::sort(paths.begin(), paths.end(), Embedfs_sort());
if (type->struct_type() == NULL
|| type->struct_type()->field_count() != 1)
{
go_error_at(loc,
("internal error: embed.FS should be struct type "
"with one field"));
return Expression::make_error(loc);
}
Type* ptr_type = type->struct_type()->field(0)->type();
if (ptr_type->points_to() == NULL)
{
go_error_at(loc,
"internal error: embed.FS struct field should be pointer");
return Expression::make_error(loc);
}
Type* slice_type = ptr_type->points_to();
if (!slice_type->is_slice_type())
{
go_error_at(loc,
("internal error: embed.FS struct field should be "
"pointer to slice"));
return Expression::make_error(loc);
}
Type* file_type = slice_type->array_type()->element_type();
if (file_type->struct_type() == NULL
|| (file_type->struct_type()->find_local_field(".embed.name", NULL)
== NULL)
|| (file_type->struct_type()->find_local_field(".embed.data", NULL)
== NULL))
{
go_error_at(loc,
("internal error: embed.FS slice element should be struct "
"with name and data fields"));
return Expression::make_error(loc);
}
const Struct_field_list* file_fields = file_type->struct_type()->fields();
Expression_list* file_vals = new(Expression_list);
file_vals->reserve(paths.size());
for (std::vector<std::string>::const_iterator pp = paths.begin();
pp != paths.end();
++pp)
{
std::string data;
if ((*pp)[pp->size() - 1] != '/')
{
if (!read_file(this->embed_files_[*pp].c_str(), loc, &data))
return Expression::make_error(loc);
}
Expression_list* field_vals = new(Expression_list);
for (Struct_field_list::const_iterator pf = file_fields->begin();
pf != file_fields->end();
++pf)
{
if (pf->is_field_name(".embed.name"))
field_vals->push_back(Expression::make_string(*pp, loc));
else if (pf->is_field_name(".embed.data"))
field_vals->push_back(Expression::make_string(data, loc));
else
{
// FIXME: The embed.file type has a hash field, which is
// currently unused. We should fill it in, but don't.
// The hash is a SHA256, and we don't have convenient
// SHA256 code. Do this later when the field is
// actually used.
field_vals->push_back(NULL);
}
}
Expression* file_val =
Expression::make_struct_composite_literal(file_type, field_vals, loc);
file_vals->push_back(file_val);
}
Expression* slice_init =
Expression::make_slice_composite_literal(slice_type, file_vals, loc);
Expression* fs_init = Expression::make_heap_expression(slice_init, loc);
Expression_list* fs_vals = new Expression_list();
fs_vals->push_back(fs_init);
return Expression::make_struct_composite_literal(type, fs_vals, loc);
}