diff options
author | Julian Andres Klode <julian.klode@canonical.com> | 2019-08-14 14:38:26 +0200 |
---|---|---|
committer | Julian Andres Klode <julian.klode@canonical.com> | 2019-08-15 13:50:03 +0200 |
commit | 690ff4c3e44e7063ebde2557b7c0087ab720b894 (patch) | |
tree | 645cba5e6113b75bbd099f0517c97dac26e4eacb | |
parent | 7c724251fd8c24e89dc8cb813eee20aa0a4ad793 (diff) | |
download | apt-690ff4c3e44e7063ebde2557b7c0087ab720b894.tar.gz |
Add initial support for parsing patterns into parse trees
Introduce a parser for patterns that generates a parse tree. The
language understood by the parser is:
pattern = '?'TERM
| '?'TERM '(' pattern (',' pattern)* ','? ')'
| WORD
| QUOTED-WORD
TERM = [0-9a-zA-Z-]
WORD = [0-9a-ZA-Z-.*^$\[\]_\\]
QUOTED_WORD = "..." # you know what I mean
This language is context free, which is a massive simplification
from aptitude's language, where ?foo(bar) could have two different
meanings depending on whether ?foo takes an argument or not.
-rw-r--r-- | apt-pkg/cachefilter-patterns.cc | 205 | ||||
-rw-r--r-- | apt-pkg/cachefilter-patterns.h | 103 | ||||
-rw-r--r-- | test/libapt/pattern_test.cc | 95 |
3 files changed, 403 insertions, 0 deletions
diff --git a/apt-pkg/cachefilter-patterns.cc b/apt-pkg/cachefilter-patterns.cc new file mode 100644 index 000000000..3c958ebae --- /dev/null +++ b/apt-pkg/cachefilter-patterns.cc @@ -0,0 +1,205 @@ +/* + * cachefilter-patterns.cc - Parser for aptitude-style patterns + * + * Copyright (c) 2019 Canonical Ltd + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include <apt-pkg/cachefilter-patterns.h> + +namespace APT +{ +namespace Internal +{ + +template <class... Args> +std::string rstrprintf(Args... args) +{ + std::string str; + strprintf(str, std::forward<Args>(args)...); + return str; +} + +// Parse a complete pattern, make sure it's the entire input +std::unique_ptr<PatternTreeParser::Node> PatternTreeParser::parseTop() +{ + skipSpace(); + auto node = parse(); + skipSpace(); + + if (node->end != sentence.size()) + { + Node node2; + + node2.start = node->end; + node2.end = sentence.size(); + throw Error{node2, "Expected end of file"}; + } + + return node; +} + +// Parse any pattern +std::unique_ptr<PatternTreeParser::Node> PatternTreeParser::parse() +{ + std::unique_ptr<Node> node; + if ((node = parsePattern()) != nullptr) + return node; + if ((node = parseQuotedWord()) != nullptr) + return node; + if ((node = parseWord()) != nullptr) + return node; + + Node eNode; + eNode.end = eNode.start = state.offset; + throw Error{eNode, "Expected pattern, quoted word, or word"}; +} + +// Parse a list pattern (or function call pattern) +std::unique_ptr<PatternTreeParser::Node> PatternTreeParser::parsePattern() +{ + static const APT::StringView CHARS("0123456789" + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "-"); + if (sentence[state.offset] != '?') + return nullptr; + + auto node = std::make_unique<PatternNode>(); + node->end = node->start = state.offset; + state.offset++; + + while (CHARS.find(sentence[state.offset]) != APT::StringView::npos) + { + ++state.offset; + } + + node->term = sentence.substr(node->start, state.offset - node->start); + + node->end = skipSpace(); + // We don't have any arguments, return node; + if (sentence[state.offset] != '(') + return node; + node->end = ++state.offset; + skipSpace(); + + node->haveArgumentList = true; + + // Empty argument list, return + if (sentence[state.offset] == ')') + { + node->end = ++state.offset; + return node; + } + + node->arguments.push_back(parse()); + skipSpace(); + while (sentence[state.offset] == ',') + { + ++state.offset; + skipSpace(); + // This was a trailing comma - allow it and break the loop + if (sentence[state.offset] == ')') + break; + node->arguments.push_back(parse()); + skipSpace(); + } + + if (sentence[state.offset] != ')') + throw Error{*node, rstrprintf("Expected closing parenthesis, received %d", sentence[state.offset])}; + + node->end = ++state.offset; + return node; +} + +// Parse a quoted word atom +std::unique_ptr<PatternTreeParser::Node> PatternTreeParser::parseQuotedWord() +{ + if (sentence[state.offset] != '"') + return nullptr; + + auto node = std::make_unique<WordNode>(); + node->start = state.offset; + + // Eat beginning of string + state.offset++; + + while (sentence[state.offset] != '"' && sentence[state.offset] != '\0') + state.offset++; + + // End of string + if (sentence[state.offset] != '"') + throw Error{*node, "Could not find end of string"}; + state.offset++; + + node->end = state.offset; + node->word = sentence.substr(node->start + 1, node->end - node->start - 2); + + return node; +} + +// Parse a bare word atom +std::unique_ptr<PatternTreeParser::Node> PatternTreeParser::parseWord() +{ + static const APT::StringView CHARS("0123456789" + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "-.*^$[]_\\"); + if (CHARS.find(sentence[state.offset]) == APT::StringView::npos) + return nullptr; + + auto node = std::make_unique<WordNode>(); + node->start = state.offset; + + while (CHARS.find(sentence[state.offset]) != APT::StringView::npos) + state.offset++; + + node->end = state.offset; + node->word = sentence.substr(node->start, node->end - node->start); + return node; +} + +// Rendering of the tree in JSON for debugging +std::ostream &PatternTreeParser::PatternNode::render(std::ostream &os) +{ + os << "{" + << "\"term\": \"" << term.to_string() << "\",\n" + << "\"arguments\": [\n"; + for (auto &node : arguments) + node->render(os) << "," << std::endl; + os << "null]\n"; + os << "}\n"; + return os; +} + +std::ostream &PatternTreeParser::WordNode::render(std::ostream &os) +{ + os << '"' << word.to_string() << '"'; + return os; +} + +std::nullptr_t PatternTreeParser::Node::error(std::string message) +{ + throw Error{*this, message}; +} + +bool PatternTreeParser::PatternNode::matches(APT::StringView name, int min, int max) +{ + if (name != term) + return false; + if (max != 0 && !haveArgumentList) + error(rstrprintf("%s expects an argument list", term.to_string().c_str())); + if (max == 0 && haveArgumentList) + error(rstrprintf("%s does not expect an argument list", term.to_string().c_str())); + if (min >= 0 && min == max && (arguments.size() != size_t(min))) + error(rstrprintf("%s expects %d arguments, but received %d arguments", term.to_string().c_str(), min, arguments.size())); + if (min >= 0 && arguments.size() < size_t(min)) + error(rstrprintf("%s expects at least %d arguments, but received %d arguments", term.to_string().c_str(), min, arguments.size())); + if (max >= 0 && arguments.size() > size_t(max)) + error(rstrprintf("%s expects at most %d arguments, but received %d arguments", term.to_string().c_str(), max, arguments.size())); + return true; +} + +} // namespace Internal +} // namespace APT diff --git a/apt-pkg/cachefilter-patterns.h b/apt-pkg/cachefilter-patterns.h new file mode 100644 index 000000000..dfbcd66a5 --- /dev/null +++ b/apt-pkg/cachefilter-patterns.h @@ -0,0 +1,103 @@ +/* + * cachefilter-patterns.h - Pattern parser and additional patterns as matchers + * + * Copyright (c) 2019 Canonical Ltd + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#ifndef APT_CACHEFILTER_PATTERNS_H +#define APT_CACHEFILTER_PATTERNS_H +#include <apt-pkg/cachefile.h> +#include <apt-pkg/cachefilter.h> +#include <apt-pkg/error.h> +#include <apt-pkg/string_view.h> +#include <apt-pkg/strutl.h> +#include <iostream> +#include <memory> +#include <sstream> +#include <string> +#include <vector> +#include <assert.h> +namespace APT +{ + +namespace Internal +{ +/** + * \brief PatternTreeParser parses the given sentence into a parse tree. + * + * The parse tree consists of nodes: + * - Word nodes which contains words or quoted words + * - Patterns, which represent ?foo and ?foo(...) patterns + */ +struct PatternTreeParser +{ + + struct Node + { + size_t start = 0; + size_t end = 0; + + virtual std::ostream &render(std::ostream &os) { return os; }; + std::nullptr_t error(std::string message); + }; + + struct Error : public std::exception + { + Node location; + std::string message; + + Error(Node location, std::string message) : location(location), message(message) {} + const char *what() const throw() override { return message.c_str(); } + }; + + struct PatternNode : public Node + { + APT::StringView term; + std::vector<std::unique_ptr<Node>> arguments; + bool haveArgumentList = false; + + std::ostream &render(std::ostream &stream) override; + bool matches(APT::StringView name, int min, int max); + }; + + struct WordNode : public Node + { + APT::StringView word; + bool quoted = false; + std::ostream &render(std::ostream &stream) override; + }; + + struct State + { + off_t offset = 0; + }; + + APT::StringView sentence; + State state; + + PatternTreeParser(APT::StringView sentence) : sentence(sentence){}; + off_t skipSpace() + { + while (sentence[state.offset] == ' ' || sentence[state.offset] == '\t' || sentence[state.offset] == '\r' || sentence[state.offset] == '\n') + state.offset++; + return state.offset; + }; + + /// \brief Parse a complete pattern + /// + /// There may not be anything before or after the pattern, except for + /// whitespace. + std::unique_ptr<Node> parseTop(); + + private: + std::unique_ptr<Node> parse(); + std::unique_ptr<Node> parsePattern(); + std::unique_ptr<Node> parseWord(); + std::unique_ptr<Node> parseQuotedWord(); +}; + +} // namespace Internal +} // namespace APT +#endif diff --git a/test/libapt/pattern_test.cc b/test/libapt/pattern_test.cc new file mode 100644 index 000000000..de2fbceb9 --- /dev/null +++ b/test/libapt/pattern_test.cc @@ -0,0 +1,95 @@ +/* + * cachefilter-patterns.h - Pattern parser and additional patterns as matchers + * + * Copyright (c) 2019 Canonical Ltd + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include <config.h> +#include <apt-pkg/cachefilter-patterns.h> +#include <apt-pkg/cachefilter.h> + +#include <gtest/gtest.h> + +using namespace APT::Internal; + +TEST(TreeParserTest, ParseWord) +{ + auto node = PatternTreeParser("word").parseTop(); + auto wordNode = dynamic_cast<PatternTreeParser::WordNode *>(node.get()); + + EXPECT_EQ(node.get(), wordNode); + EXPECT_EQ(wordNode->word, "word"); +} + +TEST(TreeParserTest, ParseQuotedWord) +{ + auto node = PatternTreeParser("\"a word\"").parseTop(); + auto wordNode = dynamic_cast<PatternTreeParser::WordNode *>(node.get()); + + EXPECT_EQ(node.get(), wordNode); + EXPECT_EQ(wordNode->word, "a word"); +} + +TEST(TreeParserTest, ParsePattern) +{ + auto node = PatternTreeParser("?hello").parseTop(); + auto patternNode = dynamic_cast<PatternTreeParser::PatternNode *>(node.get()); + + EXPECT_EQ(node.get(), patternNode); + EXPECT_EQ(patternNode->term, "?hello"); + EXPECT_TRUE(patternNode->arguments.empty()); + EXPECT_FALSE(patternNode->haveArgumentList); +} + +TEST(TreeParserTest, ParseWithEmptyArgs) +{ + auto node = PatternTreeParser("?hello()").parseTop(); + auto patternNode = dynamic_cast<PatternTreeParser::PatternNode *>(node.get()); + + EXPECT_EQ(node.get(), patternNode); + EXPECT_EQ(patternNode->term, "?hello"); + EXPECT_TRUE(patternNode->arguments.empty()); + EXPECT_TRUE(patternNode->haveArgumentList); +} + +TEST(TreeParserTest, ParseWithOneArgs) +{ + auto node = PatternTreeParser("?hello(foo)").parseTop(); + auto patternNode = dynamic_cast<PatternTreeParser::PatternNode *>(node.get()); + + EXPECT_EQ(node.get(), patternNode); + EXPECT_EQ(patternNode->term, "?hello"); + EXPECT_EQ(1u, patternNode->arguments.size()); +} + +TEST(TreeParserTest, ParseWithManyArgs) +{ + auto node = PatternTreeParser("?hello(foo,bar)").parseTop(); + auto patternNode = dynamic_cast<PatternTreeParser::PatternNode *>(node.get()); + + EXPECT_EQ(node.get(), patternNode); + EXPECT_EQ(patternNode->term, "?hello"); + EXPECT_EQ(2u, patternNode->arguments.size()); +} + +TEST(TreeParserTest, ParseWithManyArgsWithSpaces) +{ + auto node = PatternTreeParser("?hello (foo, bar)").parseTop(); + auto patternNode = dynamic_cast<PatternTreeParser::PatternNode *>(node.get()); + + EXPECT_EQ(node.get(), patternNode); + EXPECT_EQ(patternNode->term, "?hello"); + EXPECT_EQ(2u, patternNode->arguments.size()); +} + +TEST(TreeParserTest, ParseWithManyArgsWithSpacesWithTrailingComma) +{ + auto node = PatternTreeParser("?hello (foo, bar,)").parseTop(); + auto patternNode = dynamic_cast<PatternTreeParser::PatternNode *>(node.get()); + + EXPECT_EQ(node.get(), patternNode); + EXPECT_EQ(patternNode->term, "?hello"); + EXPECT_EQ(2u, patternNode->arguments.size()); +} |