aboutsummaryrefslogtreecommitdiff
path: root/utils/text/templates.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'utils/text/templates.cpp')
-rw-r--r--utils/text/templates.cpp764
1 files changed, 764 insertions, 0 deletions
diff --git a/utils/text/templates.cpp b/utils/text/templates.cpp
new file mode 100644
index 000000000000..13cb27b1cce2
--- /dev/null
+++ b/utils/text/templates.cpp
@@ -0,0 +1,764 @@
+// Copyright 2012 The Kyua Authors.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "utils/text/templates.hpp"
+
+#include <algorithm>
+#include <fstream>
+#include <sstream>
+#include <stack>
+
+#include "utils/format/macros.hpp"
+#include "utils/fs/path.hpp"
+#include "utils/noncopyable.hpp"
+#include "utils/sanity.hpp"
+#include "utils/text/exceptions.hpp"
+#include "utils/text/operations.ipp"
+
+namespace text = utils::text;
+
+
+namespace {
+
+
+/// Definition of a template statement.
+///
+/// A template statement is a particular line in the input file that is
+/// preceeded by a template marker. This class provides a high-level
+/// representation of the contents of such statement and a mechanism to parse
+/// the textual line into this high-level representation.
+class statement_def {
+public:
+ /// Types of the known statements.
+ enum statement_type {
+ /// Alternative clause of a conditional.
+ ///
+ /// Takes no arguments.
+ type_else,
+
+ /// End of conditional marker.
+ ///
+ /// Takes no arguments.
+ type_endif,
+
+ /// End of loop marker.
+ ///
+ /// Takes no arguments.
+ type_endloop,
+
+ /// Beginning of a conditional.
+ ///
+ /// Takes a single argument, which denotes the name of the variable or
+ /// vector to check for existence. This is the only expression
+ /// supported.
+ type_if,
+
+ /// Beginning of a loop over all the elements of a vector.
+ ///
+ /// Takes two arguments: the name of the vector over which to iterate
+ /// and the name of the iterator to later index this vector.
+ type_loop,
+ };
+
+private:
+ /// Internal data describing the structure of a particular statement type.
+ struct type_descriptor {
+ /// The native type of the statement.
+ statement_type type;
+
+ /// The expected number of arguments.
+ unsigned int n_arguments;
+
+ /// Constructs a new type descriptor.
+ ///
+ /// \param type_ The native type of the statement.
+ /// \param n_arguments_ The expected number of arguments.
+ type_descriptor(const statement_type type_,
+ const unsigned int n_arguments_)
+ : type(type_), n_arguments(n_arguments_)
+ {
+ }
+ };
+
+ /// Mapping of statement type names to their definitions.
+ typedef std::map< std::string, type_descriptor > types_map;
+
+ /// Description of the different statement types.
+ ///
+ /// This static map is initialized once and reused later for any statement
+ /// lookup. Unfortunately, we cannot perform this initialization in a
+ /// static manner without C++11.
+ static types_map _types;
+
+ /// Generates a new types definition map.
+ ///
+ /// \return A new types definition map, to be assigned to _types.
+ static types_map
+ generate_types_map(void)
+ {
+ // If you change this, please edit the comments in the enum above.
+ types_map types;
+ types.insert(types_map::value_type(
+ "else", type_descriptor(type_else, 0)));
+ types.insert(types_map::value_type(
+ "endif", type_descriptor(type_endif, 0)));
+ types.insert(types_map::value_type(
+ "endloop", type_descriptor(type_endloop, 0)));
+ types.insert(types_map::value_type(
+ "if", type_descriptor(type_if, 1)));
+ types.insert(types_map::value_type(
+ "loop", type_descriptor(type_loop, 2)));
+ return types;
+ }
+
+public:
+ /// The type of the statement.
+ statement_type type;
+
+ /// The arguments to the statement, in textual form.
+ const std::vector< std::string > arguments;
+
+ /// Creates a new statement.
+ ///
+ /// \param type_ The type of the statement.
+ /// \param arguments_ The arguments to the statement.
+ statement_def(const statement_type& type_,
+ const std::vector< std::string >& arguments_) :
+ type(type_), arguments(arguments_)
+ {
+#if !defined(NDEBUG)
+ for (types_map::const_iterator iter = _types.begin();
+ iter != _types.end(); ++iter) {
+ const type_descriptor& descriptor = (*iter).second;
+ if (descriptor.type == type_) {
+ PRE(descriptor.n_arguments == arguments_.size());
+ return;
+ }
+ }
+ UNREACHABLE;
+#endif
+ }
+
+ /// Parses a statement.
+ ///
+ /// \param line The textual representation of the statement without any
+ /// prefix.
+ ///
+ /// \return The parsed statement.
+ ///
+ /// \throw text::syntax_error If the statement is not correctly defined.
+ static statement_def
+ parse(const std::string& line)
+ {
+ if (_types.empty())
+ _types = generate_types_map();
+
+ const std::vector< std::string > words = text::split(line, ' ');
+ if (words.empty())
+ throw text::syntax_error("Empty statement");
+
+ const types_map::const_iterator iter = _types.find(words[0]);
+ if (iter == _types.end())
+ throw text::syntax_error(F("Unknown statement '%s'") % words[0]);
+ const type_descriptor& descriptor = (*iter).second;
+
+ if (words.size() - 1 != descriptor.n_arguments)
+ throw text::syntax_error(F("Invalid number of arguments for "
+ "statement '%s'") % words[0]);
+
+ std::vector< std::string > new_arguments;
+ new_arguments.resize(words.size() - 1);
+ std::copy(words.begin() + 1, words.end(), new_arguments.begin());
+
+ return statement_def(descriptor.type, new_arguments);
+ }
+};
+
+
+statement_def::types_map statement_def::_types;
+
+
+/// Definition of a loop.
+///
+/// This simple structure is used to keep track of the parameters of a loop.
+struct loop_def {
+ /// The name of the vector over which this loop is iterating.
+ std::string vector;
+
+ /// The name of the iterator defined by this loop.
+ std::string iterator;
+
+ /// Position in the input to which to rewind to on looping.
+ ///
+ /// This position points to the line after the loop statement, not the loop
+ /// itself. This is one of the reasons why we have this structure, so that
+ /// we can maintain the data about the loop without having to re-process it.
+ std::istream::pos_type position;
+
+ /// Constructs a new loop definition.
+ ///
+ /// \param vector_ The name of the vector (first argument).
+ /// \param iterator_ The name of the iterator (second argumnet).
+ /// \param position_ Position of the next line after the loop statement.
+ loop_def(const std::string& vector_, const std::string& iterator_,
+ const std::istream::pos_type position_) :
+ vector(vector_), iterator(iterator_), position(position_)
+ {
+ }
+};
+
+
+/// Stateful class to instantiate the templates in an input stream.
+///
+/// The goal of this parser is to scan the input once and not buffer anything in
+/// memory. The only exception are loops: loops are reinterpreted on every
+/// iteration from the same input file by rewidining the stream to the
+/// appropriate position.
+class templates_parser : utils::noncopyable {
+ /// The templates to apply.
+ ///
+ /// Note that this is not const because the parser has to have write access
+ /// to the templates. In particular, it needs to be able to define the
+ /// iterators as regular variables.
+ text::templates_def _templates;
+
+ /// Prefix that marks a line as a statement.
+ const std::string _prefix;
+
+ /// Delimiter to surround an expression instantiation.
+ const std::string _delimiter;
+
+ /// Whether to skip incoming lines or not.
+ ///
+ /// The top of the stack is true whenever we encounter a conditional that
+ /// evaluates to false or a loop that does not have any iterations left.
+ /// Under these circumstances, we need to continue scanning the input stream
+ /// until we find the matching closing endif or endloop construct.
+ ///
+ /// This is a stack rather than a plain boolean to allow us deal with
+ /// if-else clauses.
+ std::stack< bool > _skip;
+
+ /// Current count of nested conditionals.
+ unsigned int _if_level;
+
+ /// Level of the top-most conditional that evaluated to false.
+ unsigned int _exit_if_level;
+
+ /// Current count of nested loops.
+ unsigned int _loop_level;
+
+ /// Level of the top-most loop that does not have any iterations left.
+ unsigned int _exit_loop_level;
+
+ /// Information about all the nested loops up to the current point.
+ std::stack< loop_def > _loops;
+
+ /// Checks if a line is a statement or not.
+ ///
+ /// \param line The line to validate.
+ ///
+ /// \return True if the line looks like a statement, which is determined by
+ /// checking if the line starts by the predefined prefix.
+ bool
+ is_statement(const std::string& line)
+ {
+ return ((line.length() >= _prefix.length() &&
+ line.substr(0, _prefix.length()) == _prefix) &&
+ (line.length() < _delimiter.length() ||
+ line.substr(0, _delimiter.length()) != _delimiter));
+ }
+
+ /// Parses a given statement line into a statement definition.
+ ///
+ /// \param line The line to validate; it must be a valid statement.
+ ///
+ /// \return The parsed statement.
+ ///
+ /// \throw text::syntax_error If the input is not a valid statement.
+ statement_def
+ parse_statement(const std::string& line)
+ {
+ PRE(is_statement(line));
+ return statement_def::parse(line.substr(_prefix.length()));
+ }
+
+ /// Processes a line from the input when not in skip mode.
+ ///
+ /// \param line The line to be processed.
+ /// \param input The input stream from which the line was read. The current
+ /// position in the stream must be after the line being processed.
+ /// \param output The output stream into which to write the results.
+ ///
+ /// \throw text::syntax_error If the input is not valid.
+ void
+ handle_normal(const std::string& line, std::istream& input,
+ std::ostream& output)
+ {
+ if (!is_statement(line)) {
+ // Fast path. Mostly to avoid an indentation level for the big
+ // chunk of code below.
+ output << line << '\n';
+ return;
+ }
+
+ const statement_def statement = parse_statement(line);
+
+ switch (statement.type) {
+ case statement_def::type_else:
+ _skip.top() = !_skip.top();
+ break;
+
+ case statement_def::type_endif:
+ _if_level--;
+ break;
+
+ case statement_def::type_endloop: {
+ PRE(_loops.size() == _loop_level);
+ loop_def& loop = _loops.top();
+
+ const std::size_t next_index = 1 + text::to_type< std::size_t >(
+ _templates.get_variable(loop.iterator));
+
+ if (next_index < _templates.get_vector(loop.vector).size()) {
+ _templates.add_variable(loop.iterator, F("%s") % next_index);
+ input.seekg(loop.position);
+ } else {
+ _loop_level--;
+ _loops.pop();
+ _templates.remove_variable(loop.iterator);
+ }
+ } break;
+
+ case statement_def::type_if: {
+ _if_level++;
+ const std::string value = _templates.evaluate(
+ statement.arguments[0]);
+ if (value.empty() || value == "0" || value == "false") {
+ _exit_if_level = _if_level;
+ _skip.push(true);
+ } else {
+ _skip.push(false);
+ }
+ } break;
+
+ case statement_def::type_loop: {
+ _loop_level++;
+
+ const loop_def loop(statement.arguments[0], statement.arguments[1],
+ input.tellg());
+ if (_templates.get_vector(loop.vector).empty()) {
+ _exit_loop_level = _loop_level;
+ _skip.push(true);
+ } else {
+ _templates.add_variable(loop.iterator, "0");
+ _loops.push(loop);
+ _skip.push(false);
+ }
+ } break;
+ }
+ }
+
+ /// Processes a line from the input when in skip mode.
+ ///
+ /// \param line The line to be processed.
+ ///
+ /// \throw text::syntax_error If the input is not valid.
+ void
+ handle_skip(const std::string& line)
+ {
+ PRE(_skip.top());
+
+ if (!is_statement(line))
+ return;
+
+ const statement_def statement = parse_statement(line);
+ switch (statement.type) {
+ case statement_def::type_else:
+ if (_exit_if_level == _if_level)
+ _skip.top() = !_skip.top();
+ break;
+
+ case statement_def::type_endif:
+ INV(_if_level >= _exit_if_level);
+ if (_if_level == _exit_if_level)
+ _skip.top() = false;
+ _if_level--;
+ _skip.pop();
+ break;
+
+ case statement_def::type_endloop:
+ INV(_loop_level >= _exit_loop_level);
+ if (_loop_level == _exit_loop_level)
+ _skip.top() = false;
+ _loop_level--;
+ _skip.pop();
+ break;
+
+ case statement_def::type_if:
+ _if_level++;
+ _skip.push(true);
+ break;
+
+ case statement_def::type_loop:
+ _loop_level++;
+ _skip.push(true);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ /// Evaluates expressions on a given input line.
+ ///
+ /// An expression is surrounded by _delimiter on both sides. We scan the
+ /// string from left to right finding any expressions that may appear, yank
+ /// them out and call templates_def::evaluate() to get their value.
+ ///
+ /// Lonely or unbalanced appearances of _delimiter on the input line are
+ /// not considered an error, given that the user may actually want to supply
+ /// that character sequence without being interpreted as a template.
+ ///
+ /// \param in_line The input line from which to evaluate expressions.
+ ///
+ /// \return The evaluated line.
+ ///
+ /// \throw text::syntax_error If the expressions in the line are malformed.
+ std::string
+ evaluate(const std::string& in_line)
+ {
+ std::string out_line;
+
+ std::string::size_type last_pos = 0;
+ while (last_pos != std::string::npos) {
+ const std::string::size_type open_pos = in_line.find(
+ _delimiter, last_pos);
+ if (open_pos == std::string::npos) {
+ out_line += in_line.substr(last_pos);
+ last_pos = std::string::npos;
+ } else {
+ const std::string::size_type close_pos = in_line.find(
+ _delimiter, open_pos + _delimiter.length());
+ if (close_pos == std::string::npos) {
+ out_line += in_line.substr(last_pos);
+ last_pos = std::string::npos;
+ } else {
+ out_line += in_line.substr(last_pos, open_pos - last_pos);
+ out_line += _templates.evaluate(in_line.substr(
+ open_pos + _delimiter.length(),
+ close_pos - open_pos - _delimiter.length()));
+ last_pos = close_pos + _delimiter.length();
+ }
+ }
+ }
+
+ return out_line;
+ }
+
+public:
+ /// Constructs a new template parser.
+ ///
+ /// \param templates_ The templates to apply to the processed file.
+ /// \param prefix_ The prefix that identifies lines as statements.
+ /// \param delimiter_ Delimiter to surround a variable instantiation.
+ templates_parser(const text::templates_def& templates_,
+ const std::string& prefix_,
+ const std::string& delimiter_) :
+ _templates(templates_),
+ _prefix(prefix_),
+ _delimiter(delimiter_),
+ _if_level(0),
+ _exit_if_level(0),
+ _loop_level(0),
+ _exit_loop_level(0)
+ {
+ }
+
+ /// Applies the templates to a given input.
+ ///
+ /// \param input The stream to which to apply the templates.
+ /// \param output The stream into which to write the results.
+ ///
+ /// \throw text::syntax_error If the input is not valid. Note that the
+ /// is not guaranteed to be unmodified on exit if an error is
+ /// encountered.
+ void
+ instantiate(std::istream& input, std::ostream& output)
+ {
+ std::string line;
+ while (std::getline(input, line).good()) {
+ if (!_skip.empty() && _skip.top())
+ handle_skip(line);
+ else
+ handle_normal(evaluate(line), input, output);
+ }
+ }
+};
+
+
+} // anonymous namespace
+
+
+/// Constructs an empty templates definition.
+text::templates_def::templates_def(void)
+{
+}
+
+
+/// Sets a string variable in the templates.
+///
+/// If the variable already exists, its value is replaced. This behavior is
+/// required to implement iterators, but client code should really not be
+/// redefining variables.
+///
+/// \pre The variable must not already exist as a vector.
+///
+/// \param name The name of the variable to set.
+/// \param value The value to set the given variable to.
+void
+text::templates_def::add_variable(const std::string& name,
+ const std::string& value)
+{
+ PRE(_vectors.find(name) == _vectors.end());
+ _variables[name] = value;
+}
+
+
+/// Unsets a string variable from the templates.
+///
+/// Client code has no reason to use this. This is only required to implement
+/// proper scoping of loop iterators.
+///
+/// \pre The variable must exist.
+///
+/// \param name The name of the variable to remove from the templates.
+void
+text::templates_def::remove_variable(const std::string& name)
+{
+ PRE(_variables.find(name) != _variables.end());
+ _variables.erase(_variables.find(name));
+}
+
+
+/// Creates a new vector in the templates.
+///
+/// If the vector already exists, it is cleared. Client code should really not
+/// be redefining variables.
+///
+/// \pre The vector must not already exist as a variable.
+///
+/// \param name The name of the vector to set.
+void
+text::templates_def::add_vector(const std::string& name)
+{
+ PRE(_variables.find(name) == _variables.end());
+ _vectors[name] = strings_vector();
+}
+
+
+/// Adds a value to an existing vector in the templates.
+///
+/// \pre name The vector must exist.
+///
+/// \param name The name of the vector to append the value to.
+/// \param value The textual value to append to the vector.
+void
+text::templates_def::add_to_vector(const std::string& name,
+ const std::string& value)
+{
+ PRE(_variables.find(name) == _variables.end());
+ PRE(_vectors.find(name) != _vectors.end());
+ _vectors[name].push_back(value);
+}
+
+
+/// Checks whether a given identifier exists as a variable or a vector.
+///
+/// This is used to implement the evaluation of conditions in if clauses.
+///
+/// \param name The name of the variable or vector.
+///
+/// \return True if the given name exists as a variable or a vector; false
+/// otherwise.
+bool
+text::templates_def::exists(const std::string& name) const
+{
+ return (_variables.find(name) != _variables.end() ||
+ _vectors.find(name) != _vectors.end());
+}
+
+
+/// Gets the value of a variable.
+///
+/// \param name The name of the variable.
+///
+/// \return The value of the requested variable.
+///
+/// \throw text::syntax_error If the variable does not exist.
+const std::string&
+text::templates_def::get_variable(const std::string& name) const
+{
+ const variables_map::const_iterator iter = _variables.find(name);
+ if (iter == _variables.end())
+ throw text::syntax_error(F("Unknown variable '%s'") % name);
+ return (*iter).second;
+}
+
+
+/// Gets a vector.
+///
+/// \param name The name of the vector.
+///
+/// \return A reference to the requested vector.
+///
+/// \throw text::syntax_error If the vector does not exist.
+const text::templates_def::strings_vector&
+text::templates_def::get_vector(const std::string& name) const
+{
+ const vectors_map::const_iterator iter = _vectors.find(name);
+ if (iter == _vectors.end())
+ throw text::syntax_error(F("Unknown vector '%s'") % name);
+ return (*iter).second;
+}
+
+
+/// Indexes a vector and gets the value.
+///
+/// \param name The name of the vector to index.
+/// \param index_name The name of a variable representing the index to use.
+/// This must be convertible to a natural.
+///
+/// \return The value of the vector at the given index.
+///
+/// \throw text::syntax_error If the vector does not existor if the index is out
+/// of range.
+const std::string&
+text::templates_def::get_vector(const std::string& name,
+ const std::string& index_name) const
+{
+ const strings_vector& vector = get_vector(name);
+ const std::string& index_str = get_variable(index_name);
+
+ std::size_t index;
+ try {
+ index = text::to_type< std::size_t >(index_str);
+ } catch (const text::syntax_error& e) {
+ throw text::syntax_error(F("Index '%s' not an integer, value '%s'") %
+ index_name % index_str);
+ }
+ if (index >= vector.size())
+ throw text::syntax_error(F("Index '%s' out of range at position '%s'") %
+ index_name % index);
+
+ return vector[index];
+}
+
+
+/// Evaluates a expression using these templates.
+///
+/// An expression is a query on the current templates to fetch a particular
+/// value. The value is always returned as a string, as this is how templates
+/// are internally stored.
+///
+/// \param expression The expression to evaluate. This should not include any
+/// of the delimiters used in the user input, as otherwise the expression
+/// will not be evaluated properly.
+///
+/// \return The result of the expression evaluation as a string.
+///
+/// \throw text::syntax_error If there is any problem while evaluating the
+/// expression.
+std::string
+text::templates_def::evaluate(const std::string& expression) const
+{
+ const std::string::size_type paren_open = expression.find('(');
+ if (paren_open == std::string::npos) {
+ return get_variable(expression);
+ } else {
+ const std::string::size_type paren_close = expression.find(
+ ')', paren_open);
+ if (paren_close == std::string::npos)
+ throw text::syntax_error(F("Expected ')' in expression '%s')") %
+ expression);
+ if (paren_close != expression.length() - 1)
+ throw text::syntax_error(F("Unexpected text found after ')' in "
+ "expression '%s'") % expression);
+
+ const std::string arg0 = expression.substr(0, paren_open);
+ const std::string arg1 = expression.substr(
+ paren_open + 1, paren_close - paren_open - 1);
+ if (arg0 == "defined") {
+ return exists(arg1) ? "true" : "false";
+ } else if (arg0 == "length") {
+ return F("%s") % get_vector(arg1).size();
+ } else {
+ return get_vector(arg0, arg1);
+ }
+ }
+}
+
+
+/// Applies a set of templates to an input stream.
+///
+/// \param templates The templates to use.
+/// \param input The input to process.
+/// \param output The stream to which to write the processed text.
+///
+/// \throw text::syntax_error If there is any problem processing the input.
+void
+text::instantiate(const templates_def& templates,
+ std::istream& input, std::ostream& output)
+{
+ templates_parser parser(templates, "%", "%%");
+ parser.instantiate(input, output);
+}
+
+
+/// Applies a set of templates to an input file and writes an output file.
+///
+/// \param templates The templates to use.
+/// \param input_file The path to the input to process.
+/// \param output_file The path to the file into which to write the output.
+///
+/// \throw text::error If the input or output files cannot be opened.
+/// \throw text::syntax_error If there is any problem processing the input.
+void
+text::instantiate(const templates_def& templates,
+ const fs::path& input_file, const fs::path& output_file)
+{
+ std::ifstream input(input_file.c_str());
+ if (!input)
+ throw text::error(F("Failed to open %s for read") % input_file);
+
+ std::ofstream output(output_file.c_str());
+ if (!output)
+ throw text::error(F("Failed to open %s for write") % output_file);
+
+ instantiate(templates, input, output);
+}