Skip to content

Commit

Permalink
Keyword search in not working in System Logs
Browse files Browse the repository at this point in the history
- keyword parser refactoring
- changed to lowercase
- added uve & object-log

Closes-bug: 1532363

Change-Id: Ic9f15effa1a5ea14642276b3fc2af65dfd21b59b
  • Loading branch information
tedghose committed Mar 29, 2016
1 parent fd4f550 commit dd98136
Show file tree
Hide file tree
Showing 6 changed files with 316 additions and 76 deletions.
33 changes: 16 additions & 17 deletions src/analytics/db_handler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -661,30 +661,29 @@ void DbHandler::MessageTableInsert(const VizMsg *vmsgp) {
message_type, vmsgp->unm, "");

const SandeshType::type &stype(header.get_Type());
std::string s;

if (stype == SandeshType::SYSTEM) {
LineParser::WordListType words;
if (stype == SandeshType::SYSTEM || stype == SandeshType::UVE ||
stype == SandeshType::OBJECT) {
const SandeshXMLMessage *sxmsg =
static_cast<const SandeshXMLMessage *>(vmsgp->msg);
const pugi::xml_node &parent(sxmsg->GetMessageNode());
s = LineParser::GetXmlString(parent);
words = LineParser::ParseXML(sxmsg->GetMessageNode(), false);
} else if (!vmsgp->keyword_doc_.empty()) {
std::string s;
s = std::string(vmsgp->keyword_doc_);
}
if (!s.empty()) {
LineParser::WordListType words = LineParser::ParseDoc(s.begin(),
s.end());
LineParser::RemoveStopWords(&words);
for (LineParser::WordListType::iterator i = words.begin();
i != words.end(); i++) {
// tableinsert@{(t2,*i), (t1,header.get_Source())} -> vmsgp->unm
bool r = MessageIndexTableInsert(
g_viz_constants.MESSAGE_TABLE_KEYWORD, header,
message_type, vmsgp->unm, *i);
if (!r)
DB_LOG(ERROR, "Failed to parse:" << s);
if (!s.empty()) {
words = LineParser::Parse(s);
}
}
for (LineParser::WordListType::iterator i = words.begin();
i != words.end(); i++) {
// tableinsert@{(t2,*i), (t1,header.get_Source())} -> vmsgp->unm
bool r = MessageIndexTableInsert(
g_viz_constants.MESSAGE_TABLE_KEYWORD, header,
message_type, vmsgp->unm, *i);
if (!r)
DB_LOG(ERROR, "Failed to parse:");
}

/*
* Insert the message types,module_id in the stat table
Expand Down
192 changes: 141 additions & 51 deletions src/analytics/parser_util.cc
Original file line number Diff line number Diff line change
@@ -1,99 +1,189 @@
/*
* Copyright (c) 2014 Juniper Networks, Inc. All rights reserved.
*/
#define BOOST_SPIRIT_DEBUG
#include "boost/spirit/include/classic.hpp"
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/spirit/include/qi_repeat.hpp>

#include <boost/assign/list_of.hpp>

#include <boost/algorithm/string/case_conv.hpp>

#include <iostream>
#include "parser_util.h"

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
namespace phx = boost::phoenix;
using namespace BOOST_SPIRIT_CLASSIC_NS;



void
LineParser::GetAtrributes(const pugi::xml_node &node,
LineParser::WordListType *words)
{
for (pugi::xml_attribute attr = node.first_attribute(); attr;
attr = attr.next_attribute()) {
std::string s = boost::algorithm::to_lower_copy(std::string(
attr.value()));
if (!s.empty()) {
LineParser::WordListType w = ParseDoc(s.begin(), s.end());
words->insert(w.begin(), w.end());
}
}
}

void
LineParser::Travarse(const pugi::xml_node &node,
LineParser::WordListType *words, bool check_attr)
{
pugi::xml_node_type type = node.type();

if (type == pugi::node_element) {
if (check_attr)
GetAtrributes(node, words);
} else if (type == pugi::node_pcdata || type == pugi::node_cdata) {
std::string s = boost::algorithm::to_lower_copy(std::string(
node.value()));
if (!s.empty()) {
LineParser::WordListType w = ParseDoc(s.begin(), s.end());
words->insert(w.begin(), w.end());
}
}
for (pugi::xml_node s = node.first_child(); s; s = s.next_sibling())
Travarse(s, words, check_attr);
}

LineParser::WordListType
LineParser::ParseXML(const pugi::xml_node &node, bool check_attr)
{
LineParser::WordListType w;

if (check_attr)
GetAtrributes(node, &w);
for (pugi::xml_node s = node; s; s = s.next_sibling())
Travarse(s, &w, check_attr);
return w;
}

LineParser::WordListType
LineParser::Parse(std::string s) {
std::string ls = boost::algorithm::to_lower_copy(s);
return ParseDoc(ls.begin(), ls.end());
}

template<typename Iterator>
struct msg_skipper : public qi::grammar<Iterator> {
msg_skipper() : msg_skipper::base_type(skip, "msgskpr") {
skip = ascii::space | qi::char_(".,;:[](){}\t\r");
}
qi::rule<Iterator> skip;
};

template <typename Iterator>
LineParser::WordListType
LineParser::ParseDoc(Iterator start, Iterator end) {
LineParser::ParseDoc(Iterator start, Iterator end)
{
using ascii::space;
using qi::char_;
using qi::double_;
using qi::int_;
using qi::lit;
using qi::_1;
using qi::lexeme;
using qi::debug;
using qi::on_error;
using qi::eps;
using qi::fail;
using phoenix::push_back;
using phoenix::ref;

qi::rule<Iterator, std::string(), ascii::space_type> num =
lexeme[ +(char_(L'0', L'1')) ];
qi::rule<Iterator, std::string(), ascii::space_type> word =
lexeme[ +(char_ - ' ' - ':' - ',') ];
qi::rule<Iterator, std::string(), ascii::space_type> word2 =
using phx::insert;
using phx::ref;
using boost::spirit::repeat;

typedef msg_skipper<Iterator> skipper_t;
skipper_t skpr;

qi::rule<Iterator, std::string(), skipper_t> num1 =
lexeme[ *char_("0-9") >> '.' >> +char_("0-9") ];
qi::rule<Iterator, std::string(), skipper_t> num2=
lexeme[ +char_("0-9") >> -lit('.') ];
qi::rule<Iterator, std::string(), skipper_t> hex1=
lexeme[ lit('0') >> lit('x') >> +char_("0-9A-Fa-f") ];
qi::rule<Iterator, std::string(), skipper_t> oct1=
lexeme[ lit('0') >> +char_("0-7") ];
qi::rule<Iterator, std::string(), skipper_t> word =
lexeme[ +(char_ - char_(" .,;:[](){}\t\r")) ];
qi::rule<Iterator, std::string(), skipper_t> word2 =
'\'' >> lexeme[ +(char_ - '\'') ] >> '\'';
qi::rule<Iterator, std::string(), ascii::space_type> word3 =
qi::rule<Iterator, std::string(), skipper_t> word3 =
'"' >> lexeme[ +(char_ - '"') ] >> '"';
qi::rule<Iterator, std::string(), ascii::space_type> word4 =
'(' >> lexeme[ +(char_ - ')') ] >> ')';
qi::rule<Iterator, std::string(), ascii::space_type> word5 =
'{' >> lexeme[ +(char_ - '}') ] >> '}';
qi::rule<Iterator, std::string(), ascii::space_type> word6 =
'[' >> lexeme[ +(char_ - ']') ] >> ']';
qi::rule<Iterator, std::string(), ascii::space_type> ip =
qi::rule<Iterator, std::string(), skipper_t> uuid =
lexeme[ repeat(8)[char_("0-9a-fA-F")] >> char_('-') >>
repeat(3)[ repeat(4)[char_("0-9a-fA-F")] >> char_('-') ] >>
repeat(12)[char_("0-9a-fA-F")] ];
qi::rule<Iterator, std::string(), skipper_t> ip =
lexeme[ +char_(L'0', L'9') >> char_('.') >> +char_(L'0', L'9')
>> +(char_('.') >> +char_(L'0', L'9'))
>> -(char_('/') >> +char_(L'0', L'9'))];
qi::rule<Iterator, std::string(), ascii::space_type> ipv6 =
qi::rule<Iterator, std::string(), skipper_t> ipv6 =
lexeme[ +char_("0-9a-fA-F") >> +(+ char_(':')
>> +char_("0-9a-fA-F")) >> -(char_('/')
>> +char_(L'0', L'9'))];
qi::rule<Iterator, std::string(), ascii::space_type> stats =
qi::rule<Iterator, std::string(), skipper_t> stats =
lexeme[ +char_(L'0', L'9') >> +(char_('/') >> +char_(L'0', L'9'))];

qi::symbols<char, bool> stop_words;
stop_words.add
("via", true)("or", true)("of", true)
("string", true)("sandesh", true)("client", true)
("the", true)("that", true)("and", true);
WordListType v;
qi::rule<Iterator, std::string(), skipper_t> aw =
+( *( lit(":")
| lit(",")
| lit(".")
| lit(";")
| lit("&")
) >>
( stop_words
| stats [insert(ref(v), _1)]
| word2 [insert(ref(v), _1)]
| word3 [insert(ref(v), _1)]
| uuid [insert(ref(v), _1)]
| ip [insert(ref(v), _1)]
| ipv6 [insert(ref(v), _1)]
| hex1
| oct1
| num1
| num2
| word [insert(ref(v), _1)]
)
);

bool r = qi::phrase_parse(start, end,
// Begin grammer
+( *(lit(":")) >>
( word2 [push_back(ref(v), _1)]
| word3 [push_back(ref(v), _1)]
| word4 [push_back(ref(v), _1)]
| word5 [push_back(ref(v), _1)]
| word6 [push_back(ref(v), _1)]
| ip [push_back(ref(v), _1)]
| ipv6 [push_back(ref(v), _1)]
| stats [push_back(ref(v), _1)]
| double_
| int_
| word [push_back(ref(v), _1)]
)
)
*aw
// end grammer
, space);
BOOST_SPIRIT_DEBUG_NODES((word)(stats)(ipv6)(ip)(word2));
, skpr);
BOOST_SPIRIT_DEBUG_NODES((word)(num1)(aw)(word2));
BOOST_SPIRIT_DEBUG_RULE(word);
BOOST_SPIRIT_DEBUG_RULE(word2);
BOOST_SPIRIT_DEBUG_RULE(aw);
if ((start == end) && r)
return v;
else
return WordListType();
std::cout << "failed " << r << " " << (start == end) << "\n";
return v;
}

void
LineParser::RemoveStopWords(WordListType *v) {
WordListType::iterator i = v->begin();
while (i != v->end()) {
if (stop_words_.find(*i) == stop_words_.end())
i++;
else
i = v->erase(i);
}
//WordListType::iterator i = v->begin();
//while (i != v->end()) {
//if (stop_words_.find(*i) == stop_words_.end())
// i++;
//else
// i = v->erase(i);
//}
}

std::string
Expand Down Expand Up @@ -128,7 +218,7 @@ std::map<std::string, bool> LineParser::stop_words_ =
void
TemplateGen() {
std::string s("hello");
LineParser::WordListType words = LineParser::ParseDoc(s.begin(), s.end());
LineParser::WordListType words = LineParser::Parse(s);
std::cout << "result length: " << words.size() << std::endl;
}

14 changes: 10 additions & 4 deletions src/analytics/parser_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#ifndef __PARSER_UTIL__
#define __PARSER_UTIL__
#include <string>
#include <vector>
#include <set>
#include <map>

#include <pugixml/pugixml.hpp>
Expand All @@ -13,14 +13,20 @@ class LineParser
{
public:

typedef std::vector<std::string> WordListType;
typedef std::set<std::string> WordListType;

template <typename Iterator>
static WordListType ParseDoc(Iterator start, Iterator end);
static WordListType Parse(std::string s);
static WordListType ParseXML(const pugi::xml_node &node,
bool check_attr=true);
static void RemoveStopWords(WordListType *v);
static std::string GetXmlString(const pugi::xml_node node);
static std::string MakeSane(const std::string &text);
private:
template <typename Iterator>
static WordListType ParseDoc(Iterator start, Iterator end);
static void Travarse(const pugi::xml_node &node, WordListType *words,
bool check_attr=true);
static void GetAtrributes(const pugi::xml_node &node, WordListType *words);
static std::map<std::string, bool> stop_words_;
};

Expand Down
2 changes: 1 addition & 1 deletion src/analytics/test/db_handler_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -652,7 +652,7 @@ TEST_F(DbHandlerTest, MessageTableInsertTest) {
g_viz_constants.MESSAGE_TABLE_KEYWORD),
_,
_))))
.Times(2)
.Times(3)
.WillRepeatedly(Return(true));

EXPECT_CALL(*dbif_mock(),
Expand Down

0 comments on commit dd98136

Please sign in to comment.