Skip to content

Commit

Permalink
Keyword search in not working in System Logs
Browse files Browse the repository at this point in the history
- keyword parser refactoring
- changed to lowercase
- added uve & object-log

Closes-bug: 1532363

Change-Id: Ic9f15effa1a5ea14642276b3fc2af65dfd21b59b
(cherry picked from commit dd98136)

Support for prefix match for all index fields in the system/object log
queries

This patch adds support for prefix match for key fields such as Source,
ModuleId, Messagetype and Category in the WHERE clause for system and
object log queries.
contrail-logs to support prefix match queries for --source,
--message-type and --category

Change-Id: I7afabe51a19766802981455fc7ebe3bb2b156acc
Partial-Bug: #1566449
(cherry picked from commit fe6c251)
  • Loading branch information
tedghose authored and Sundaresan Rajangam committed Apr 6, 2016
1 parent 31aa712 commit 16b99f3
Show file tree
Hide file tree
Showing 10 changed files with 427 additions and 132 deletions.
33 changes: 16 additions & 17 deletions src/analytics/db_handler.cc
Expand Up @@ -661,30 +661,29 @@ void DbHandler::MessageTableInsert(const VizMsg *vmsgp) {
message_type, vmsgp->unm, "");

const SandeshType::type &stype(header.get_Type());
std::string s;

if (stype == SandeshType::SYSTEM) {
LineParser::WordListType words;
if (stype == SandeshType::SYSTEM || stype == SandeshType::UVE ||
stype == SandeshType::OBJECT) {
const SandeshXMLMessage *sxmsg =
static_cast<const SandeshXMLMessage *>(vmsgp->msg);
const pugi::xml_node &parent(sxmsg->GetMessageNode());
s = LineParser::GetXmlString(parent);
words = LineParser::ParseXML(sxmsg->GetMessageNode(), false);
} else if (!vmsgp->keyword_doc_.empty()) {
std::string s;
s = std::string(vmsgp->keyword_doc_);
}
if (!s.empty()) {
LineParser::WordListType words = LineParser::ParseDoc(s.begin(),
s.end());
LineParser::RemoveStopWords(&words);
for (LineParser::WordListType::iterator i = words.begin();
i != words.end(); i++) {
// tableinsert@{(t2,*i), (t1,header.get_Source())} -> vmsgp->unm
bool r = MessageIndexTableInsert(
g_viz_constants.MESSAGE_TABLE_KEYWORD, header,
message_type, vmsgp->unm, *i);
if (!r)
DB_LOG(ERROR, "Failed to parse:" << s);
if (!s.empty()) {
words = LineParser::Parse(s);
}
}
for (LineParser::WordListType::iterator i = words.begin();
i != words.end(); i++) {
// tableinsert@{(t2,*i), (t1,header.get_Source())} -> vmsgp->unm
bool r = MessageIndexTableInsert(
g_viz_constants.MESSAGE_TABLE_KEYWORD, header,
message_type, vmsgp->unm, *i);
if (!r)
DB_LOG(ERROR, "Failed to parse:");
}

/*
* Insert the message types,module_id in the stat table
Expand Down
192 changes: 141 additions & 51 deletions src/analytics/parser_util.cc
@@ -1,99 +1,189 @@
/*
* Copyright (c) 2014 Juniper Networks, Inc. All rights reserved.
*/
#define BOOST_SPIRIT_DEBUG
#include "boost/spirit/include/classic.hpp"
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/spirit/include/qi_repeat.hpp>

#include <boost/assign/list_of.hpp>

#include <boost/algorithm/string/case_conv.hpp>

#include <iostream>
#include "parser_util.h"

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
namespace phx = boost::phoenix;
using namespace BOOST_SPIRIT_CLASSIC_NS;



void
LineParser::GetAtrributes(const pugi::xml_node &node,
LineParser::WordListType *words)
{
for (pugi::xml_attribute attr = node.first_attribute(); attr;
attr = attr.next_attribute()) {
std::string s = boost::algorithm::to_lower_copy(std::string(
attr.value()));
if (!s.empty()) {
LineParser::WordListType w = ParseDoc(s.begin(), s.end());
words->insert(w.begin(), w.end());
}
}
}

void
LineParser::Travarse(const pugi::xml_node &node,
LineParser::WordListType *words, bool check_attr)
{
pugi::xml_node_type type = node.type();

if (type == pugi::node_element) {
if (check_attr)
GetAtrributes(node, words);
} else if (type == pugi::node_pcdata || type == pugi::node_cdata) {
std::string s = boost::algorithm::to_lower_copy(std::string(
node.value()));
if (!s.empty()) {
LineParser::WordListType w = ParseDoc(s.begin(), s.end());
words->insert(w.begin(), w.end());
}
}
for (pugi::xml_node s = node.first_child(); s; s = s.next_sibling())
Travarse(s, words, check_attr);
}

LineParser::WordListType
LineParser::ParseXML(const pugi::xml_node &node, bool check_attr)
{
LineParser::WordListType w;

if (check_attr)
GetAtrributes(node, &w);
for (pugi::xml_node s = node; s; s = s.next_sibling())
Travarse(s, &w, check_attr);
return w;
}

LineParser::WordListType
LineParser::Parse(std::string s) {
std::string ls = boost::algorithm::to_lower_copy(s);
return ParseDoc(ls.begin(), ls.end());
}

template<typename Iterator>
struct msg_skipper : public qi::grammar<Iterator> {
msg_skipper() : msg_skipper::base_type(skip, "msgskpr") {
skip = ascii::space | qi::char_(".,;:[](){}\t\r");
}
qi::rule<Iterator> skip;
};

template <typename Iterator>
LineParser::WordListType
LineParser::ParseDoc(Iterator start, Iterator end) {
LineParser::ParseDoc(Iterator start, Iterator end)
{
using ascii::space;
using qi::char_;
using qi::double_;
using qi::int_;
using qi::lit;
using qi::_1;
using qi::lexeme;
using qi::debug;
using qi::on_error;
using qi::eps;
using qi::fail;
using phoenix::push_back;
using phoenix::ref;

qi::rule<Iterator, std::string(), ascii::space_type> num =
lexeme[ +(char_(L'0', L'1')) ];
qi::rule<Iterator, std::string(), ascii::space_type> word =
lexeme[ +(char_ - ' ' - ':' - ',') ];
qi::rule<Iterator, std::string(), ascii::space_type> word2 =
using phx::insert;
using phx::ref;
using boost::spirit::repeat;

typedef msg_skipper<Iterator> skipper_t;
skipper_t skpr;

qi::rule<Iterator, std::string(), skipper_t> num1 =
lexeme[ *char_("0-9") >> '.' >> +char_("0-9") ];
qi::rule<Iterator, std::string(), skipper_t> num2=
lexeme[ +char_("0-9") >> -lit('.') ];
qi::rule<Iterator, std::string(), skipper_t> hex1=
lexeme[ lit('0') >> lit('x') >> +char_("0-9A-Fa-f") ];
qi::rule<Iterator, std::string(), skipper_t> oct1=
lexeme[ lit('0') >> +char_("0-7") ];
qi::rule<Iterator, std::string(), skipper_t> word =
lexeme[ +(char_ - char_(" .,;:[](){}\t\r")) ];
qi::rule<Iterator, std::string(), skipper_t> word2 =
'\'' >> lexeme[ +(char_ - '\'') ] >> '\'';
qi::rule<Iterator, std::string(), ascii::space_type> word3 =
qi::rule<Iterator, std::string(), skipper_t> word3 =
'"' >> lexeme[ +(char_ - '"') ] >> '"';
qi::rule<Iterator, std::string(), ascii::space_type> word4 =
'(' >> lexeme[ +(char_ - ')') ] >> ')';
qi::rule<Iterator, std::string(), ascii::space_type> word5 =
'{' >> lexeme[ +(char_ - '}') ] >> '}';
qi::rule<Iterator, std::string(), ascii::space_type> word6 =
'[' >> lexeme[ +(char_ - ']') ] >> ']';
qi::rule<Iterator, std::string(), ascii::space_type> ip =
qi::rule<Iterator, std::string(), skipper_t> uuid =
lexeme[ repeat(8)[char_("0-9a-fA-F")] >> char_('-') >>
repeat(3)[ repeat(4)[char_("0-9a-fA-F")] >> char_('-') ] >>
repeat(12)[char_("0-9a-fA-F")] ];
qi::rule<Iterator, std::string(), skipper_t> ip =
lexeme[ +char_(L'0', L'9') >> char_('.') >> +char_(L'0', L'9')
>> +(char_('.') >> +char_(L'0', L'9'))
>> -(char_('/') >> +char_(L'0', L'9'))];
qi::rule<Iterator, std::string(), ascii::space_type> ipv6 =
qi::rule<Iterator, std::string(), skipper_t> ipv6 =
lexeme[ +char_("0-9a-fA-F") >> +(+ char_(':')
>> +char_("0-9a-fA-F")) >> -(char_('/')
>> +char_(L'0', L'9'))];
qi::rule<Iterator, std::string(), ascii::space_type> stats =
qi::rule<Iterator, std::string(), skipper_t> stats =
lexeme[ +char_(L'0', L'9') >> +(char_('/') >> +char_(L'0', L'9'))];

qi::symbols<char, bool> stop_words;
stop_words.add
("via", true)("or", true)("of", true)
("string", true)("sandesh", true)("client", true)
("the", true)("that", true)("and", true);
WordListType v;
qi::rule<Iterator, std::string(), skipper_t> aw =
+( *( lit(":")
| lit(",")
| lit(".")
| lit(";")
| lit("&")
) >>
( stop_words
| stats [insert(ref(v), _1)]
| word2 [insert(ref(v), _1)]
| word3 [insert(ref(v), _1)]
| uuid [insert(ref(v), _1)]
| ip [insert(ref(v), _1)]
| ipv6 [insert(ref(v), _1)]
| hex1
| oct1
| num1
| num2
| word [insert(ref(v), _1)]
)
);

bool r = qi::phrase_parse(start, end,
// Begin grammer
+( *(lit(":")) >>
( word2 [push_back(ref(v), _1)]
| word3 [push_back(ref(v), _1)]
| word4 [push_back(ref(v), _1)]
| word5 [push_back(ref(v), _1)]
| word6 [push_back(ref(v), _1)]
| ip [push_back(ref(v), _1)]
| ipv6 [push_back(ref(v), _1)]
| stats [push_back(ref(v), _1)]
| double_
| int_
| word [push_back(ref(v), _1)]
)
)
*aw
// end grammer
, space);
BOOST_SPIRIT_DEBUG_NODES((word)(stats)(ipv6)(ip)(word2));
, skpr);
BOOST_SPIRIT_DEBUG_NODES((word)(num1)(aw)(word2));
BOOST_SPIRIT_DEBUG_RULE(word);
BOOST_SPIRIT_DEBUG_RULE(word2);
BOOST_SPIRIT_DEBUG_RULE(aw);
if ((start == end) && r)
return v;
else
return WordListType();
std::cout << "failed " << r << " " << (start == end) << "\n";
return v;
}

void
LineParser::RemoveStopWords(WordListType *v) {
WordListType::iterator i = v->begin();
while (i != v->end()) {
if (stop_words_.find(*i) == stop_words_.end())
i++;
else
i = v->erase(i);
}
//WordListType::iterator i = v->begin();
//while (i != v->end()) {
//if (stop_words_.find(*i) == stop_words_.end())
// i++;
//else
// i = v->erase(i);
//}
}

std::string
Expand Down Expand Up @@ -128,7 +218,7 @@ std::map<std::string, bool> LineParser::stop_words_ =
void
TemplateGen() {
std::string s("hello");
LineParser::WordListType words = LineParser::ParseDoc(s.begin(), s.end());
LineParser::WordListType words = LineParser::Parse(s);
std::cout << "result length: " << words.size() << std::endl;
}

14 changes: 10 additions & 4 deletions src/analytics/parser_util.h
Expand Up @@ -4,7 +4,7 @@
#ifndef __PARSER_UTIL__
#define __PARSER_UTIL__
#include <string>
#include <vector>
#include <set>
#include <map>

#include <pugixml/pugixml.hpp>
Expand All @@ -13,14 +13,20 @@ class LineParser
{
public:

typedef std::vector<std::string> WordListType;
typedef std::set<std::string> WordListType;

template <typename Iterator>
static WordListType ParseDoc(Iterator start, Iterator end);
static WordListType Parse(std::string s);
static WordListType ParseXML(const pugi::xml_node &node,
bool check_attr=true);
static void RemoveStopWords(WordListType *v);
static std::string GetXmlString(const pugi::xml_node node);
static std::string MakeSane(const std::string &text);
private:
template <typename Iterator>
static WordListType ParseDoc(Iterator start, Iterator end);
static void Travarse(const pugi::xml_node &node, WordListType *words,
bool check_attr=true);
static void GetAtrributes(const pugi::xml_node &node, WordListType *words);
static std::map<std::string, bool> stop_words_;
};

Expand Down
2 changes: 1 addition & 1 deletion src/analytics/test/db_handler_test.cc
Expand Up @@ -663,7 +663,7 @@ TEST_F(DbHandlerTest, DISABLED_MessageTableInsertTestCql) {
g_viz_constants.MESSAGE_TABLE_KEYWORD),
_,
_))))
.Times(2)
.Times(3)
.WillRepeatedly(Return(true));

EXPECT_CALL(*dbif_mock(),
Expand Down

0 comments on commit 16b99f3

Please sign in to comment.