summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJulian Andres Klode <julian.klode@canonical.com>2019-08-14 14:38:26 +0200
committerJulian Andres Klode <julian.klode@canonical.com>2019-08-15 13:50:03 +0200
commit690ff4c3e44e7063ebde2557b7c0087ab720b894 (patch)
tree645cba5e6113b75bbd099f0517c97dac26e4eacb
parent7c724251fd8c24e89dc8cb813eee20aa0a4ad793 (diff)
downloadapt-690ff4c3e44e7063ebde2557b7c0087ab720b894.tar.gz
Add initial support for parsing patterns into parse trees
Introduce a parser for patterns that generates a parse tree. The language understood by the parser is: pattern = '?'TERM | '?'TERM '(' pattern (',' pattern)* ','? ')' | WORD | QUOTED-WORD TERM = [0-9a-zA-Z-] WORD = [0-9a-ZA-Z-.*^$\[\]_\\] QUOTED_WORD = "..." # you know what I mean This language is context free, which is a massive simplification from aptitude's language, where ?foo(bar) could have two different meanings depending on whether ?foo takes an argument or not.
-rw-r--r--apt-pkg/cachefilter-patterns.cc205
-rw-r--r--apt-pkg/cachefilter-patterns.h103
-rw-r--r--test/libapt/pattern_test.cc95
3 files changed, 403 insertions, 0 deletions
diff --git a/apt-pkg/cachefilter-patterns.cc b/apt-pkg/cachefilter-patterns.cc
new file mode 100644
index 000000000..3c958ebae
--- /dev/null
+++ b/apt-pkg/cachefilter-patterns.cc
@@ -0,0 +1,205 @@
+/*
+ * cachefilter-patterns.cc - Parser for aptitude-style patterns
+ *
+ * Copyright (c) 2019 Canonical Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0+
+ */
+
+#include <apt-pkg/cachefilter-patterns.h>
+
+namespace APT
+{
+namespace Internal
+{
+
+template <class... Args>
+std::string rstrprintf(Args... args)
+{
+ std::string str;
+ strprintf(str, std::forward<Args>(args)...);
+ return str;
+}
+
+// Parse a complete pattern, make sure it's the entire input
+std::unique_ptr<PatternTreeParser::Node> PatternTreeParser::parseTop()
+{
+ skipSpace();
+ auto node = parse();
+ skipSpace();
+
+ if (node->end != sentence.size())
+ {
+ Node node2;
+
+ node2.start = node->end;
+ node2.end = sentence.size();
+ throw Error{node2, "Expected end of file"};
+ }
+
+ return node;
+}
+
+// Parse any pattern
+std::unique_ptr<PatternTreeParser::Node> PatternTreeParser::parse()
+{
+ std::unique_ptr<Node> node;
+ if ((node = parsePattern()) != nullptr)
+ return node;
+ if ((node = parseQuotedWord()) != nullptr)
+ return node;
+ if ((node = parseWord()) != nullptr)
+ return node;
+
+ Node eNode;
+ eNode.end = eNode.start = state.offset;
+ throw Error{eNode, "Expected pattern, quoted word, or word"};
+}
+
+// Parse a list pattern (or function call pattern)
+std::unique_ptr<PatternTreeParser::Node> PatternTreeParser::parsePattern()
+{
+ static const APT::StringView CHARS("0123456789"
+ "abcdefghijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "-");
+ if (sentence[state.offset] != '?')
+ return nullptr;
+
+ auto node = std::make_unique<PatternNode>();
+ node->end = node->start = state.offset;
+ state.offset++;
+
+ while (CHARS.find(sentence[state.offset]) != APT::StringView::npos)
+ {
+ ++state.offset;
+ }
+
+ node->term = sentence.substr(node->start, state.offset - node->start);
+
+ node->end = skipSpace();
+ // We don't have any arguments, return node;
+ if (sentence[state.offset] != '(')
+ return node;
+ node->end = ++state.offset;
+ skipSpace();
+
+ node->haveArgumentList = true;
+
+ // Empty argument list, return
+ if (sentence[state.offset] == ')')
+ {
+ node->end = ++state.offset;
+ return node;
+ }
+
+ node->arguments.push_back(parse());
+ skipSpace();
+ while (sentence[state.offset] == ',')
+ {
+ ++state.offset;
+ skipSpace();
+ // This was a trailing comma - allow it and break the loop
+ if (sentence[state.offset] == ')')
+ break;
+ node->arguments.push_back(parse());
+ skipSpace();
+ }
+
+ if (sentence[state.offset] != ')')
+ throw Error{*node, rstrprintf("Expected closing parenthesis, received %d", sentence[state.offset])};
+
+ node->end = ++state.offset;
+ return node;
+}
+
+// Parse a quoted word atom
+std::unique_ptr<PatternTreeParser::Node> PatternTreeParser::parseQuotedWord()
+{
+ if (sentence[state.offset] != '"')
+ return nullptr;
+
+ auto node = std::make_unique<WordNode>();
+ node->start = state.offset;
+
+ // Eat beginning of string
+ state.offset++;
+
+ while (sentence[state.offset] != '"' && sentence[state.offset] != '\0')
+ state.offset++;
+
+ // End of string
+ if (sentence[state.offset] != '"')
+ throw Error{*node, "Could not find end of string"};
+ state.offset++;
+
+ node->end = state.offset;
+ node->word = sentence.substr(node->start + 1, node->end - node->start - 2);
+
+ return node;
+}
+
+// Parse a bare word atom
+std::unique_ptr<PatternTreeParser::Node> PatternTreeParser::parseWord()
+{
+ static const APT::StringView CHARS("0123456789"
+ "abcdefghijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "-.*^$[]_\\");
+ if (CHARS.find(sentence[state.offset]) == APT::StringView::npos)
+ return nullptr;
+
+ auto node = std::make_unique<WordNode>();
+ node->start = state.offset;
+
+ while (CHARS.find(sentence[state.offset]) != APT::StringView::npos)
+ state.offset++;
+
+ node->end = state.offset;
+ node->word = sentence.substr(node->start, node->end - node->start);
+ return node;
+}
+
+// Rendering of the tree in JSON for debugging
+std::ostream &PatternTreeParser::PatternNode::render(std::ostream &os)
+{
+ os << "{"
+ << "\"term\": \"" << term.to_string() << "\",\n"
+ << "\"arguments\": [\n";
+ for (auto &node : arguments)
+ node->render(os) << "," << std::endl;
+ os << "null]\n";
+ os << "}\n";
+ return os;
+}
+
+std::ostream &PatternTreeParser::WordNode::render(std::ostream &os)
+{
+ os << '"' << word.to_string() << '"';
+ return os;
+}
+
+std::nullptr_t PatternTreeParser::Node::error(std::string message)
+{
+ throw Error{*this, message};
+}
+
+bool PatternTreeParser::PatternNode::matches(APT::StringView name, int min, int max)
+{
+ if (name != term)
+ return false;
+ if (max != 0 && !haveArgumentList)
+ error(rstrprintf("%s expects an argument list", term.to_string().c_str()));
+ if (max == 0 && haveArgumentList)
+ error(rstrprintf("%s does not expect an argument list", term.to_string().c_str()));
+ if (min >= 0 && min == max && (arguments.size() != size_t(min)))
+ error(rstrprintf("%s expects %d arguments, but received %d arguments", term.to_string().c_str(), min, arguments.size()));
+ if (min >= 0 && arguments.size() < size_t(min))
+ error(rstrprintf("%s expects at least %d arguments, but received %d arguments", term.to_string().c_str(), min, arguments.size()));
+ if (max >= 0 && arguments.size() > size_t(max))
+ error(rstrprintf("%s expects at most %d arguments, but received %d arguments", term.to_string().c_str(), max, arguments.size()));
+ return true;
+}
+
+} // namespace Internal
+} // namespace APT
diff --git a/apt-pkg/cachefilter-patterns.h b/apt-pkg/cachefilter-patterns.h
new file mode 100644
index 000000000..dfbcd66a5
--- /dev/null
+++ b/apt-pkg/cachefilter-patterns.h
@@ -0,0 +1,103 @@
+/*
+ * cachefilter-patterns.h - Pattern parser and additional patterns as matchers
+ *
+ * Copyright (c) 2019 Canonical Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0+
+ */
+
+#ifndef APT_CACHEFILTER_PATTERNS_H
+#define APT_CACHEFILTER_PATTERNS_H
+#include <apt-pkg/cachefile.h>
+#include <apt-pkg/cachefilter.h>
+#include <apt-pkg/error.h>
+#include <apt-pkg/string_view.h>
+#include <apt-pkg/strutl.h>
+#include <iostream>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <assert.h>
+namespace APT
+{
+
+namespace Internal
+{
+/**
+ * \brief PatternTreeParser parses the given sentence into a parse tree.
+ *
+ * The parse tree consists of nodes:
+ * - Word nodes which contains words or quoted words
+ * - Patterns, which represent ?foo and ?foo(...) patterns
+ */
+struct PatternTreeParser
+{
+
+ struct Node
+ {
+ size_t start = 0;
+ size_t end = 0;
+
+ virtual std::ostream &render(std::ostream &os) { return os; };
+ std::nullptr_t error(std::string message);
+ };
+
+ struct Error : public std::exception
+ {
+ Node location;
+ std::string message;
+
+ Error(Node location, std::string message) : location(location), message(message) {}
+ const char *what() const throw() override { return message.c_str(); }
+ };
+
+ struct PatternNode : public Node
+ {
+ APT::StringView term;
+ std::vector<std::unique_ptr<Node>> arguments;
+ bool haveArgumentList = false;
+
+ std::ostream &render(std::ostream &stream) override;
+ bool matches(APT::StringView name, int min, int max);
+ };
+
+ struct WordNode : public Node
+ {
+ APT::StringView word;
+ bool quoted = false;
+ std::ostream &render(std::ostream &stream) override;
+ };
+
+ struct State
+ {
+ off_t offset = 0;
+ };
+
+ APT::StringView sentence;
+ State state;
+
+ PatternTreeParser(APT::StringView sentence) : sentence(sentence){};
+ off_t skipSpace()
+ {
+ while (sentence[state.offset] == ' ' || sentence[state.offset] == '\t' || sentence[state.offset] == '\r' || sentence[state.offset] == '\n')
+ state.offset++;
+ return state.offset;
+ };
+
+ /// \brief Parse a complete pattern
+ ///
+ /// There may not be anything before or after the pattern, except for
+ /// whitespace.
+ std::unique_ptr<Node> parseTop();
+
+ private:
+ std::unique_ptr<Node> parse();
+ std::unique_ptr<Node> parsePattern();
+ std::unique_ptr<Node> parseWord();
+ std::unique_ptr<Node> parseQuotedWord();
+};
+
+} // namespace Internal
+} // namespace APT
+#endif
diff --git a/test/libapt/pattern_test.cc b/test/libapt/pattern_test.cc
new file mode 100644
index 000000000..de2fbceb9
--- /dev/null
+++ b/test/libapt/pattern_test.cc
@@ -0,0 +1,95 @@
+/*
+ * cachefilter-patterns.h - Pattern parser and additional patterns as matchers
+ *
+ * Copyright (c) 2019 Canonical Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0+
+ */
+
+#include <config.h>
+#include <apt-pkg/cachefilter-patterns.h>
+#include <apt-pkg/cachefilter.h>
+
+#include <gtest/gtest.h>
+
+using namespace APT::Internal;
+
+TEST(TreeParserTest, ParseWord)
+{
+ auto node = PatternTreeParser("word").parseTop();
+ auto wordNode = dynamic_cast<PatternTreeParser::WordNode *>(node.get());
+
+ EXPECT_EQ(node.get(), wordNode);
+ EXPECT_EQ(wordNode->word, "word");
+}
+
+TEST(TreeParserTest, ParseQuotedWord)
+{
+ auto node = PatternTreeParser("\"a word\"").parseTop();
+ auto wordNode = dynamic_cast<PatternTreeParser::WordNode *>(node.get());
+
+ EXPECT_EQ(node.get(), wordNode);
+ EXPECT_EQ(wordNode->word, "a word");
+}
+
+TEST(TreeParserTest, ParsePattern)
+{
+ auto node = PatternTreeParser("?hello").parseTop();
+ auto patternNode = dynamic_cast<PatternTreeParser::PatternNode *>(node.get());
+
+ EXPECT_EQ(node.get(), patternNode);
+ EXPECT_EQ(patternNode->term, "?hello");
+ EXPECT_TRUE(patternNode->arguments.empty());
+ EXPECT_FALSE(patternNode->haveArgumentList);
+}
+
+TEST(TreeParserTest, ParseWithEmptyArgs)
+{
+ auto node = PatternTreeParser("?hello()").parseTop();
+ auto patternNode = dynamic_cast<PatternTreeParser::PatternNode *>(node.get());
+
+ EXPECT_EQ(node.get(), patternNode);
+ EXPECT_EQ(patternNode->term, "?hello");
+ EXPECT_TRUE(patternNode->arguments.empty());
+ EXPECT_TRUE(patternNode->haveArgumentList);
+}
+
+TEST(TreeParserTest, ParseWithOneArgs)
+{
+ auto node = PatternTreeParser("?hello(foo)").parseTop();
+ auto patternNode = dynamic_cast<PatternTreeParser::PatternNode *>(node.get());
+
+ EXPECT_EQ(node.get(), patternNode);
+ EXPECT_EQ(patternNode->term, "?hello");
+ EXPECT_EQ(1u, patternNode->arguments.size());
+}
+
+TEST(TreeParserTest, ParseWithManyArgs)
+{
+ auto node = PatternTreeParser("?hello(foo,bar)").parseTop();
+ auto patternNode = dynamic_cast<PatternTreeParser::PatternNode *>(node.get());
+
+ EXPECT_EQ(node.get(), patternNode);
+ EXPECT_EQ(patternNode->term, "?hello");
+ EXPECT_EQ(2u, patternNode->arguments.size());
+}
+
+TEST(TreeParserTest, ParseWithManyArgsWithSpaces)
+{
+ auto node = PatternTreeParser("?hello (foo, bar)").parseTop();
+ auto patternNode = dynamic_cast<PatternTreeParser::PatternNode *>(node.get());
+
+ EXPECT_EQ(node.get(), patternNode);
+ EXPECT_EQ(patternNode->term, "?hello");
+ EXPECT_EQ(2u, patternNode->arguments.size());
+}
+
+TEST(TreeParserTest, ParseWithManyArgsWithSpacesWithTrailingComma)
+{
+ auto node = PatternTreeParser("?hello (foo, bar,)").parseTop();
+ auto patternNode = dynamic_cast<PatternTreeParser::PatternNode *>(node.get());
+
+ EXPECT_EQ(node.get(), patternNode);
+ EXPECT_EQ(patternNode->term, "?hello");
+ EXPECT_EQ(2u, patternNode->arguments.size());
+}