diff --git a/src/ifmap/ifmap_graph_walker.cc b/src/ifmap/ifmap_graph_walker.cc index 5004a77b3dd..82c8a8d3783 100644 --- a/src/ifmap/ifmap_graph_walker.cc +++ b/src/ifmap/ifmap_graph_walker.cc @@ -371,6 +371,7 @@ void IFMapGraphWalker::AddNodesToWhitelist() { traversal_white_list_->include_vertex.insert("loadbalancer-healthmonitor"); traversal_white_list_->include_vertex.insert("subnet"); traversal_white_list_->include_vertex.insert("service-health-check"); + traversal_white_list_->include_vertex.insert("bgp-as-a-service"); } void IFMapGraphWalker::AddLinksToWhitelist() { @@ -476,5 +477,11 @@ void IFMapGraphWalker::AddLinksToWhitelist() { "source=floating-ip-pool,target=virtual-network"); traversal_white_list_->include_edge.insert( "source=virtual-machine-interface,target=service-health-check"); + traversal_white_list_->include_edge.insert( + "source=virtual-machine-interface,target=bgp-as-a-service"); + traversal_white_list_->include_edge.insert( + "source=bgp-as-a-service,target=bgp-router"); + traversal_white_list_->include_edge.insert( + "source=bgp-router,target=routing-instance"); } diff --git a/src/vnsw/agent/contrail-vrouter-agent.conf b/src/vnsw/agent/contrail-vrouter-agent.conf index 9e5bc4dbbe7..21a215db88f 100644 --- a/src/vnsw/agent/contrail-vrouter-agent.conf +++ b/src/vnsw/agent/contrail-vrouter-agent.conf @@ -199,3 +199,7 @@ docker_command=/usr/bin/opencontrail-vrouter-docker # # Log message if time taken to schedule task exceeds a threshold (in msec) # log_schedule_threshold = 25 + +[SERVICES] +# bgp_as_a_service_port_range - reserving set of ports to be used. +# bgp_as_a_service_port_range=30000-35000 diff --git a/src/vnsw/agent/controller/controller_init.cc b/src/vnsw/agent/controller/controller_init.cc index 14cbdc97eb4..b2aecac1b8a 100644 --- a/src/vnsw/agent/controller/controller_init.cc +++ b/src/vnsw/agent/controller/controller_init.cc @@ -47,7 +47,7 @@ VNController::VNController(Agent *agent) work_queue_(agent->task_scheduler()->GetTaskId("Agent::ControllerXmpp"), 0, boost::bind(&VNController::ControllerWorkQueueProcess, this, _1)), - fabric_multicast_label_range_() { + fabric_multicast_label_range_(), xmpp_channel_down_cb_() { work_queue_.set_name("Controller Queue"); decommissioned_peer_list_.clear(); } @@ -302,7 +302,8 @@ void VNController::DnsXmppServerDisConnect() { //If not agent never got a channel down state and is being removed //as it is not part of discovery list. //Artificially inject NOT_READY in agent xmpp channel. -void VNController::DeleteAgentXmppChannel(AgentXmppChannel *channel) { +void VNController::DeleteAgentXmppChannel(uint8_t idx) { + AgentXmppChannel *channel = agent_->controller_xmpp_channel(idx); if (!channel) return; @@ -315,6 +316,9 @@ void VNController::DeleteAgentXmppChannel(AgentXmppChannel *channel) { AgentXmppChannel::HandleAgentXmppClientChannelEvent(channel, xmps::NOT_READY); } + //Every delete of channel should delete flow of bgp-as-a-service, + //which is using this CN. + xmpp_channel_down_cb_(idx); } //Trigger shutdown and cleanup of routes for the client @@ -386,7 +390,7 @@ void VNController::DisConnectControllerIfmapServer(uint8_t idx) { agent_->set_controller_ifmap_xmpp_client(NULL, idx); //cleanup AgentXmppChannel - DeleteAgentXmppChannel(agent_->controller_xmpp_channel(idx)); + DeleteAgentXmppChannel(idx); agent_->reset_controller_xmpp_channel(idx); //cleanup AgentIfmapXmppChannel diff --git a/src/vnsw/agent/controller/controller_init.h b/src/vnsw/agent/controller/controller_init.h index a7bb43d9bd4..c5421bff1a7 100644 --- a/src/vnsw/agent/controller/controller_init.h +++ b/src/vnsw/agent/controller/controller_init.h @@ -77,6 +77,7 @@ class ControllerDiscoveryData : public ControllerWorkQueueData { class VNController { public: + typedef boost::function XmppChannelDownCb; typedef boost::shared_ptr ControllerXmppDataType; typedef boost::shared_ptr ControllerDeletePeerDataType; typedef boost::shared_ptr ControllerWorkQueueDataType; @@ -154,7 +155,7 @@ class VNController { bool XmppMessageProcess(ControllerXmppDataType data); Agent *agent() {return agent_;} void Enqueue(ControllerWorkQueueDataType data); - void DeleteAgentXmppChannel(AgentXmppChannel *ch); + void DeleteAgentXmppChannel(uint8_t idx); void SetAgentMcastLabelRange(uint8_t idx); void FillMcastLabelRange(uint32_t *star_idx, uint32_t *end_idx, @@ -162,6 +163,9 @@ class VNController { const FabricMulticastLabelRange &fabric_multicast_label_range(uint8_t idx) const { return fabric_multicast_label_range_[idx]; } + void RegisterControllerChangeCallback(XmppChannelDownCb xmpp_channel_down_cb) { + xmpp_channel_down_cb_ = xmpp_channel_down_cb; + } private: AgentXmppChannel *FindAgentXmppChannel(const std::string &server_ip); @@ -181,6 +185,7 @@ class VNController { ConfigCleanupTimer config_cleanup_timer_; WorkQueue work_queue_; FabricMulticastLabelRange fabric_multicast_label_range_[MAX_XMPP_SERVERS]; + XmppChannelDownCb xmpp_channel_down_cb_; }; extern SandeshTraceBufferPtr ControllerInfoTraceBuf; diff --git a/src/vnsw/agent/init/agent_param.cc b/src/vnsw/agent/init/agent_param.cc index f52b833194b..dd51f68c09b 100644 --- a/src/vnsw/agent/init/agent_param.cc +++ b/src/vnsw/agent/init/agent_param.cc @@ -577,6 +577,11 @@ void AgentParam::ParseNexthopServer() { } } +void AgentParam::ParseBgpAsAServicePortRange() { + GetValueFromTree(bgp_as_a_service_port_range_, + "SERVICES.bgp_as_a_service_port_range"); +} + void AgentParam::ParseCollectorArguments (const boost::program_options::variables_map &var_map) { GetOptValue< vector >(var_map, collector_server_list_, @@ -792,6 +797,12 @@ void AgentParam::ParsePlatformArguments } } +void AgentParam::ParseBgpAsAServicePortRangeArguments + (const boost::program_options::variables_map &v) { + GetOptValue(v, bgp_as_a_service_port_range_, + "SERVICES.bgp_as_a_service_port_range"); +} + // Initialize hypervisor mode based on system information // If "/proc/xen" exists it means we are running in Xen dom0 void AgentParam::InitFromSystem() { @@ -840,6 +851,7 @@ void AgentParam::InitFromConfig() { ParseAgentInfo(); ParseNexthopServer(); ParsePlatform(); + ParseBgpAsAServicePortRange(); cout << "Config file <" << config_file_ << "> parsing completed.\n"; return; } @@ -863,6 +875,7 @@ void AgentParam::InitFromArguments() { ParseAgentInfoArguments(var_map_); ParseNexthopServerArguments(var_map_); ParsePlatformArguments(var_map_); + ParseBgpAsAServicePortRangeArguments(var_map_); return; } @@ -1085,6 +1098,7 @@ void AgentParam::LogConfig() const { LOG(DEBUG, "Service instance workers : " << si_netns_workers_); LOG(DEBUG, "Service instance timeout : " << si_netns_timeout_); LOG(DEBUG, "Service instance lb ssl : " << si_lb_ssl_cert_path_); + LOG(DEBUG, "Bgp as a service port range : " << bgp_as_a_service_port_range_); if (hypervisor_mode_ == MODE_KVM) { LOG(DEBUG, "Hypervisor mode : kvm"); return; diff --git a/src/vnsw/agent/init/agent_param.h b/src/vnsw/agent/init/agent_param.h index 4dff2dff72c..c3ea96c977b 100644 --- a/src/vnsw/agent/init/agent_param.h +++ b/src/vnsw/agent/init/agent_param.h @@ -225,6 +225,9 @@ class AgentParam { } std::string agent_base_dir() const { return agent_base_dir_; } uint32_t sandesh_send_rate_limit() { return send_ratelimit_; } + const std::string &bgp_as_a_service_port_range() const { + return bgp_as_a_service_port_range_; + } uint16_t flow_thread_count() const { return flow_thread_count_; } void set_flow_thread_count(uint16_t count) { flow_thread_count_ = count; } @@ -232,6 +235,7 @@ class AgentParam { uint32_t tbb_thread_count() const { return tbb_thread_count_; } uint32_t tbb_exec_delay() const { return tbb_exec_delay_; } uint32_t tbb_schedule_delay() const { return tbb_schedule_delay_; } + protected: void set_hypervisor_mode(HypervisorMode m) { hypervisor_mode_ = m; } virtual void InitFromSystem(); @@ -305,6 +309,7 @@ class AgentParam { void ParseAgentInfo(); void ParseNexthopServer(); void ParsePlatform(); + void ParseBgpAsAServicePortRange(); void set_agent_mode(const std::string &mode); void ParseCollectorArguments @@ -337,6 +342,8 @@ class AgentParam { (const boost::program_options::variables_map &v); void ParsePlatformArguments (const boost::program_options::variables_map &v); + void ParseBgpAsAServicePortRangeArguments + (const boost::program_options::variables_map &v); boost::program_options::variables_map var_map_; boost::program_options::options_description options_; @@ -427,6 +434,7 @@ class AgentParam { uint32_t send_ratelimit_; uint16_t flow_thread_count_; bool subnet_hosts_resolvable_; + std::string bgp_as_a_service_port_range_; // TBB related uint32_t tbb_thread_count_; diff --git a/src/vnsw/agent/oper/SConscript b/src/vnsw/agent/oper/SConscript index add3e51be50..cb126e25156 100644 --- a/src/vnsw/agent/oper/SConscript +++ b/src/vnsw/agent/oper/SConscript @@ -33,6 +33,7 @@ vnswoperdb = env.Library('vnswoperdb', 'agent_route.cc', 'agent_route_resync.cc', 'agent_route_walker.cc', + 'bgp_as_service.cc', 'bridge_route.cc', 'config_manager.cc', 'evpn_route.cc', diff --git a/src/vnsw/agent/oper/agent.sandesh b/src/vnsw/agent/oper/agent.sandesh index 5038bfa98a3..4ae8a4e2b5c 100644 --- a/src/vnsw/agent/oper/agent.sandesh +++ b/src/vnsw/agent/oper/agent.sandesh @@ -1364,3 +1364,19 @@ response sandesh HealthCheckSandeshResp { 1: list hc_list; } +struct BgpAsAServiceSandeshList { + 1: string vm_bgp_peer_ip; + 2: i32 vm_nat_source_port; + 3: string vmi_uuid (link="ItfReq"); // intf uuid +} + +request sandesh BgpAsAServiceSandeshReq { +} + +response sandesh BgpAsAServiceSandeshResp { + 1: list bgp_as_a_service_list; +} + +trace sandesh BgpAsAServiceTrace { + 1: string message; +} diff --git a/src/vnsw/agent/oper/audit_list.h b/src/vnsw/agent/oper/audit_list.h new file mode 100644 index 00000000000..bb8b14b1ace --- /dev/null +++ b/src/vnsw/agent/oper/audit_list.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016 Juniper Networks, Inc. All rights reserved. + */ + +#ifndef vnsw_agent_audit_list_hpp +#define vnsw_agent_audit_list_hpp + +///////////////////////////////////////////////////////////////////////////// +// Template function to audit two lists. This is used to synchronize the +// operational and config list for Floating-IP, Service-Vlans, Static Routes +// and SG List +///////////////////////////////////////////////////////////////////////////// +template +bool AuditList(List &list, Iterator old_first, Iterator old_last, + Iterator new_first, Iterator new_last) { + bool ret = false; + Iterator old_iterator = old_first; + Iterator new_iterator = new_first; + while (old_iterator != old_last && new_iterator != new_last) { + if (old_iterator->IsLess(new_iterator.operator->())) { + Iterator bkp = old_iterator++; + list.Remove(bkp); + ret = true; + } else if (new_iterator->IsLess(old_iterator.operator->())) { + Iterator bkp = new_iterator++; + list.Insert(bkp.operator->()); + ret = true; + } else { + Iterator old_bkp = old_iterator++; + Iterator new_bkp = new_iterator++; + list.Update(old_bkp.operator->(), new_bkp.operator->()); + ret = true; + } + } + + while (old_iterator != old_last) { + Iterator bkp = old_iterator++; + list.Remove(bkp); + ret = true; + } + + while (new_iterator != new_last) { + Iterator bkp = new_iterator++; + list.Insert(bkp.operator->()); + ret = true; + } + + return ret; +} +#endif diff --git a/src/vnsw/agent/oper/bgp_as_service.cc b/src/vnsw/agent/oper/bgp_as_service.cc new file mode 100644 index 00000000000..d54a0226684 --- /dev/null +++ b/src/vnsw/agent/oper/bgp_as_service.cc @@ -0,0 +1,380 @@ +/* + * Copyright (c) 2016 Juniper Networks, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include "net/address_util.h" + +using namespace std; +SandeshTraceBufferPtr BgpAsAServiceTraceBuf(SandeshTraceBufferCreate + ("BgpAsAService", 500)); + +BgpAsAService::BgpAsAService(const Agent *agent) : + agent_(agent), + bgp_as_a_service_entry_map_(), + service_delete_cb_() { + BindBgpAsAServicePorts(agent->params()->bgp_as_a_service_port_range()); +} + +BgpAsAService::~BgpAsAService() { +} + +void BgpAsAService::BindBgpAsAServicePorts(const std::string &port_range) { + vector ports; + if (!stringToIntegerList(port_range, "-", ports) || + ports.size() != 2) { + BGPASASERVICETRACE(Trace, "Port bind range rejected -" + "parsing failed"); + return; + } + + uint32_t start = ports[0]; + uint32_t end = ports[1]; + + for (uint32_t port = start; port <= end; port++) { + int port_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + struct sockaddr_in address; + memset(&address, '0', sizeof(address)); + address.sin_family = AF_INET; + address.sin_addr.s_addr = htonl(agent_->router_id().to_ulong()); + address.sin_port = htons(port); + int optval = 1; + if (fcntl(port_fd, F_SETFD, FD_CLOEXEC) < 0) { + std::stringstream ss; + ss << "Port setting fcntl failed with error "; + ss << strerror(errno); + ss << " for port "; + ss << port; + BGPASASERVICETRACE(Trace, ss.str().c_str()); + } + setsockopt(port_fd, SOL_SOCKET, SO_REUSEADDR, + &optval, sizeof(optval)); + if (bind(port_fd, (struct sockaddr*) &address, + sizeof(sockaddr_in)) < 0) { + std::stringstream ss; + ss << "Port bind failed for port# "; + ss << port; + ss << " with error "; + ss << strerror(errno); + BGPASASERVICETRACE(Trace, ss.str().c_str()); + } + } +} + +const BgpAsAService::BgpAsAServiceEntryMap &BgpAsAService::bgp_as_a_service_map() const { + return bgp_as_a_service_entry_map_; +} + +void BgpAsAService::BgpAsAServiceList::Insert(const BgpAsAServiceEntry *rhs) { + list_.insert(*rhs); +} + +void BgpAsAService::BgpAsAServiceList::Update(const BgpAsAServiceEntry *lhs, + const BgpAsAServiceEntry *rhs) { +} + +void BgpAsAService::BgpAsAServiceList::Remove(BgpAsAServiceEntryListIterator &it) { + it->set_del_pending(true); +} + +void BgpAsAService::BgpAsAServiceList::Flush() { + list_.clear(); +} + +static const std::string GetBgpRouterVrfName(const Agent *agent, + IFMapNode *node) { + IFMapAgentTable *table = static_cast(node->table()); + for (DBGraphVertex::adjacency_iterator it = node->begin(table->GetGraph()); + it != node->end(table->GetGraph()); ++it) { + IFMapNode *vrf_node = static_cast(it.operator->()); + if (agent->config_manager()->SkipNode + (vrf_node, agent->cfg()->cfg_vrf_table())) { + continue; + } + return vrf_node->name(); + } + return std::string(); +} + +void BgpAsAService::BuildBgpAsAServiceInfo(IFMapNode *bgp_as_a_service_node, + BgpAsAServiceEntryList &new_list, + const std::string &vm_vrf_name) { + IFMapAgentTable *table = + static_cast(bgp_as_a_service_node->table()); + autogen::BgpAsAService *bgp_as_a_service = + dynamic_cast(bgp_as_a_service_node->GetObject()); + assert(bgp_as_a_service); + boost::system::error_code ec; + IpAddress local_peer_ip = + IpAddress::from_string(bgp_as_a_service->bgpaas_ip_address(), ec); + if (ec.value() != 0) { + std::stringstream ss; + ss << "Ip address parsing failed for "; + ss << bgp_as_a_service->bgpaas_ip_address(); + BGPASASERVICETRACE(Trace, ss.str().c_str()); + return; + } + + //Look for neighbour bgp-router to take the source port + + for (DBGraphVertex::adjacency_iterator it = bgp_as_a_service_node->begin(table->GetGraph()); + it != bgp_as_a_service_node->end(table->GetGraph()); ++it) { + IFMapNode *adj_node = static_cast(it.operator->()); + if (agent_->config_manager()->SkipNode(adj_node)) { + continue; + } + if (strcmp(adj_node->table()->Typename(), BGP_ROUTER_CONFIG_NAME) == 0) { + autogen::BgpRouter *bgp_router= + dynamic_cast(adj_node->GetObject()); + const std::string &vrf_name = + GetBgpRouterVrfName(agent_, adj_node); + if (vrf_name.empty() || (vrf_name != vm_vrf_name)) + continue; //Skip the node with no VRF, notification will come. + new_list.insert(BgpAsAServiceEntry(local_peer_ip, + bgp_router->parameters().source_port)); + } + } +} + +void BgpAsAService::ProcessConfig(const std::string &vrf_name, + std::list &node_map, + const boost::uuids::uuid &vm_uuid) { + std::list::const_iterator it = + node_map.begin(); + BgpAsAServiceEntryList new_bgp_as_a_service_entry_list; + while (it != node_map.end()) { + BuildBgpAsAServiceInfo(*it, new_bgp_as_a_service_entry_list, + vrf_name); + it++; + } + + //Audit and enqueue updates/deletes of flow + BgpAsAServiceEntryMapIterator old_bgp_as_a_service_entry_list_iter = + bgp_as_a_service_entry_map_.find(vm_uuid); + bool changed = false; + if (old_bgp_as_a_service_entry_list_iter != + bgp_as_a_service_entry_map_.end()) { + //Audit + changed = AuditList + (*(old_bgp_as_a_service_entry_list_iter->second), + old_bgp_as_a_service_entry_list_iter->second->list_.begin(), + old_bgp_as_a_service_entry_list_iter->second->list_.end(), + new_bgp_as_a_service_entry_list.begin(), + new_bgp_as_a_service_entry_list.end()); + } else if (new_bgp_as_a_service_entry_list.size() != 0) { + bgp_as_a_service_entry_map_[vm_uuid] = + new BgpAsAServiceList(new_bgp_as_a_service_entry_list); + } + + if (changed && service_delete_cb_) { + //Enqueue flow handler request. + BgpAsAServiceEntryListIterator deleted_list_iter = + old_bgp_as_a_service_entry_list_iter->second->list_.begin(); + while (deleted_list_iter != + old_bgp_as_a_service_entry_list_iter->second->list_.end()) { + BgpAsAServiceEntryListIterator prev = deleted_list_iter++; + if (prev->del_pending_) { + service_delete_cb_(vm_uuid, prev->source_port_); + old_bgp_as_a_service_entry_list_iter->second->list_.erase(prev); + } + } + } +} + +void BgpAsAService::DeleteVmInterface(const boost::uuids::uuid &vm_uuid) { + if (service_delete_cb_ == NULL) + return; + + BgpAsAServiceEntryMapIterator iter = + bgp_as_a_service_entry_map_.find(vm_uuid); + if (iter == bgp_as_a_service_entry_map_.end()) + return; + + BgpAsAServiceEntryList list = iter->second->list_; + BgpAsAServiceEntryListIterator list_iter = list.begin(); + while (list_iter != list.end()) { + service_delete_cb_(vm_uuid, (*list_iter).source_port_); + list_iter++; + } + delete iter->second; + bgp_as_a_service_entry_map_.erase(iter); +} + + +bool BgpAsAService::IsBgpService(const VmInterface *vm_intf, + const IpAddress &source_ip, + const IpAddress &dest_ip) const { + bool ret = false; + BgpAsAServiceEntryMapConstIterator iter = + bgp_as_a_service_entry_map_.find(vm_intf->GetUuid()); + if (iter == bgp_as_a_service_entry_map_.end()) { + return false; + } + + while (iter != bgp_as_a_service_entry_map_.end()) { + BgpAsAService::BgpAsAServiceEntryListIterator it = + iter->second->list_.begin(); + while (it != iter->second->list_.end()) { + if ((*it).local_peer_ip_ == source_ip) + return true; + it++; + } + iter++; + } + + const VnEntry *vn = vm_intf->vn(); + if (vn == NULL) return false; + + if ((vn->GetGatewayFromIpam(source_ip) == dest_ip) || + (vn->GetDnsFromIpam(source_ip) == dest_ip)) { + ret = true; + } + return ret; +} + +bool BgpAsAService::GetBgpRouterServiceDestination(const VmInterface *vm_intf, + const IpAddress &source_ip, + const IpAddress &dest, + IpAddress *nat_server, + uint32_t *sport) const { + const VnEntry *vn = vm_intf->vn(); + if (vn == NULL) return false; + + const IpAddress &gw = vn->GetGatewayFromIpam(source_ip); + const IpAddress &dns = vn->GetDnsFromIpam(source_ip); + + boost::system::error_code ec; + BgpAsAServiceEntryMapConstIterator map_it = + bgp_as_a_service_entry_map_.find(vm_intf->GetUuid()); + if (map_it == bgp_as_a_service_entry_map_.end()) return false; + + BgpAsAServiceEntryListConstIterator it = map_it->second->list_.begin(); + while (it != map_it->second->list_.end()) { + if (dest == gw) { + if (agent_->controller_ifmap_xmpp_server(0).empty()) + return false; + *nat_server = + IpAddress::from_string(agent_-> + controller_ifmap_xmpp_server(0), ec); + if (ec.value() != 0) { + std::stringstream ss; + ss << "Ip address parsing failed for "; + ss << agent_->controller_ifmap_xmpp_server(0); + BGPASASERVICETRACE(Trace, ss.str().c_str()); + return false; + } + *sport = it->source_port_; + return true; + } + if (dest == dns) { + if (agent_->controller_ifmap_xmpp_server(1).empty()) + return false; + *nat_server = + IpAddress::from_string(agent_-> + controller_ifmap_xmpp_server(1), ec); + if (ec.value() != 0) { + std::stringstream ss; + ss << "Ip address parsing failed for "; + ss << agent_->controller_ifmap_xmpp_server(1); + BGPASASERVICETRACE(Trace, ss.str().c_str()); + return false; + } + *sport = it->source_port_; + return true; + } + it++; + } + return false; +} + +//////////////////////////////////////////////////////////////////////////// +// BGP as a service routines. +//////////////////////////////////////////////////////////////////////////// +BgpAsAService::BgpAsAServiceEntry::BgpAsAServiceEntry() : + VmInterface::ListEntry(), + local_peer_ip_(), source_port_(0) { +} + +BgpAsAService::BgpAsAServiceEntry::BgpAsAServiceEntry +(const BgpAsAService::BgpAsAServiceEntry &rhs) : + VmInterface::ListEntry(rhs.installed_, rhs.del_pending_), + local_peer_ip_(rhs.local_peer_ip_), source_port_(rhs.source_port_) { +} + +BgpAsAService::BgpAsAServiceEntry::BgpAsAServiceEntry(const IpAddress &local_peer_ip, + uint32_t source_port) : + VmInterface::ListEntry(), + local_peer_ip_(local_peer_ip), + source_port_(source_port) { +} + +BgpAsAService::BgpAsAServiceEntry::~BgpAsAServiceEntry() { +} + +bool BgpAsAService::BgpAsAServiceEntry::operator == + (const BgpAsAServiceEntry &rhs) const { + return ((source_port_ == rhs.source_port_) && + (local_peer_ip_ == rhs.local_peer_ip_)); +} + +bool BgpAsAService::BgpAsAServiceEntry::operator() + (const BgpAsAServiceEntry &lhs, const BgpAsAServiceEntry &rhs) const { + return lhs.IsLess(&rhs); +} + +bool BgpAsAService::BgpAsAServiceEntry::IsLess + (const BgpAsAServiceEntry *rhs) const { + if (source_port_ != rhs->source_port_) + return source_port_ < rhs->source_port_; + return local_peer_ip_ < rhs->local_peer_ip_; +} + +void BgpAsAServiceSandeshReq::HandleRequest() const { + BgpAsAServiceSandeshResp *resp = new BgpAsAServiceSandeshResp(); + resp->set_context(context()); + + Agent *agent = Agent::GetInstance(); + + BgpAsAService::BgpAsAServiceEntryMap map_entry = + agent->oper_db()->bgp_as_a_service()->bgp_as_a_service_map(); + BgpAsAService::BgpAsAServiceEntryMapIterator map_it = + map_entry.begin(); + std::vector bgpaas_map; + while (map_it != map_entry.end()) { + BgpAsAService::BgpAsAServiceEntryListIterator it = + map_it->second->list_.begin(); + while (it != map_it->second->list_.end()) { + BgpAsAServiceSandeshList entry; + entry.set_vm_bgp_peer_ip((*it).local_peer_ip_.to_string()); + entry.set_vm_nat_source_port((*it).source_port_); + entry.set_vmi_uuid(UuidToString(map_it->first)); + bgpaas_map.push_back(entry); + it++; + } + map_it++; + } + resp->set_bgp_as_a_service_list(bgpaas_map); + resp->Response(); +} diff --git a/src/vnsw/agent/oper/bgp_as_service.h b/src/vnsw/agent/oper/bgp_as_service.h new file mode 100644 index 00000000000..c33b63ea508 --- /dev/null +++ b/src/vnsw/agent/oper/bgp_as_service.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2016 Juniper Networks, Inc. All rights reserved. + */ + +#ifndef vnsw_agent_bgp_as_service_hpp +#define vnsw_agent_bgp_as_service_hpp + +#include "oper/interface_common.h" + +//////////////////////////////////////////////////////////////////////////// +// BGP as a service +// +// Function: +// This service enables a VM tries to establish BGP session to control-node. +// It will not try to connect to control-node directly as its unaware of same, +// instaead it will try to connect to its gateway or DNS ip. +// For example in subnet of 1.1.1.0/24, VM will try to connect on well defined +// BGP port to either 1.1.1.1(=gw) or 1.1.1.2(=DNS). Agent sees this traffic and +// creates a NAT. The calculation of NAT is done as follows: +// Pkt from VM: +// source: VM-SIP, destination: DIP(gw/dns), source port: VM-sport, destination +// port: BGP-port. +// +// After NAT: +// source: vrouter IP, destination: Control-node#1(if DIP was gw), +// Control-node#2(if DIP was DNS), source port: BGP-router port, +// destination port: BGP-port. +// +// This way VM is nat'd to control-node. +// Config object(bgp-router) will provide BGP-router port used in NAT. +// If new set of control-node changes flows should use new set given. +// +// What all is done here? +// +// 1) Reserves a set of BGP port which can potentially be used in +// bgp-router object. +// This is provided via contrail-vrouter-agent.conf and agents binds on +// to these ports, so that host does not use it. +// +// 2) Handles config changes. ProcessConfig is called from VM interfaces. +// It traverses the link from VM to bgp-as-a-service to get peer ip which VM +// may use to peer with control-node. Note: This may be VM IP or additional +// IP provisioned for bgp in VM. From bgp-as-a-service config of bgp-router +// is taken and that will tell the port number used for source nat'ng VM +// traffic to control-node. Lastly it takes VRF from bgp-router to validate +// that bgp-router and bgp-as-a-service belong to same VRF as of VM. +// +// 3) Validators - Flow uses these to verify if a flow can be catgorised for +// BGP service or not. It also provides the control-node to be used for +// nat'ng based on VM destination. +//////////////////////////////////////////////////////////////////////////// + +#define BGP_ROUTER_CONFIG_NAME "bgp-router" +#define BGP_AS_SERVICE_CONFIG_NAME "bgp-as-a-service" + +extern SandeshTraceBufferPtr BgpAsAServiceTraceBuf; +#define BGPASASERVICETRACE(obj, ...) \ +do { \ + BgpAsAService##obj::TraceMsg(BgpAsAServiceTraceBuf, __FILE__, __LINE__, __VA_ARGS__);\ +} while (false); + +class IFMapNode; +class BgpAsAService { +public: + static const uint32_t DefaultBgpPort = 179; + typedef boost::function ServiceDeleteCb; + + //Keep the BGP as a service data here. + //Is used when flow is established or when CN is updated. + struct BgpAsAServiceEntry : public VmInterface::ListEntry { + BgpAsAServiceEntry(); + BgpAsAServiceEntry(const BgpAsAServiceEntry &rhs); + BgpAsAServiceEntry(const IpAddress &local_peer_ip, + uint32_t source_port); + ~BgpAsAServiceEntry(); + bool operator == (const BgpAsAServiceEntry &rhs) const; + bool operator() (const BgpAsAServiceEntry &lhs, + const BgpAsAServiceEntry &rhs) const; + bool IsLess(const BgpAsAServiceEntry *rhs) const; + + IpAddress local_peer_ip_; + uint32_t source_port_; + }; + typedef std::set BgpAsAServiceEntryList; + typedef BgpAsAServiceEntryList::iterator BgpAsAServiceEntryListIterator; + typedef BgpAsAServiceEntryList::const_iterator BgpAsAServiceEntryListConstIterator; + + struct BgpAsAServiceList { + BgpAsAServiceList() : list_() { } + BgpAsAServiceList(BgpAsAServiceEntryList list) : list_(list) { }; + ~BgpAsAServiceList() { } + void Insert(const BgpAsAServiceEntry *rhs); + void Update(const BgpAsAServiceEntry *lhs, + const BgpAsAServiceEntry *rhs); + void Remove(BgpAsAServiceEntryListIterator &it); + void Flush(); + + BgpAsAServiceEntryList list_; + }; + typedef std::map BgpAsAServiceEntryMap; + typedef BgpAsAServiceEntryMap::iterator BgpAsAServiceEntryMapIterator; + typedef BgpAsAServiceEntryMap::const_iterator BgpAsAServiceEntryMapConstIterator; + + BgpAsAService(const Agent *agent); + ~BgpAsAService(); + + bool IsBgpService(const VmInterface *vm_intf, + const IpAddress &source_ip, + const IpAddress &dest_ip) const; + bool GetBgpRouterServiceDestination(const VmInterface *vm_intf, + const IpAddress &source, + const IpAddress &dest, + IpAddress *nat_server, + uint32_t *sport) const; + void ProcessConfig(const std::string &vrf_name, + std::list &node_list, + const boost::uuids::uuid &vmi_uuid); + void DeleteVmInterface(const boost::uuids::uuid &vmi_uuid); + const BgpAsAService::BgpAsAServiceEntryMap &bgp_as_a_service_map() const; + void RegisterServiceDeleteCb(ServiceDeleteCb callback) { + service_delete_cb_ = callback; + } + +private: + void BindBgpAsAServicePorts(const std::string &port_range); + void BuildBgpAsAServiceInfo(IFMapNode *bgp_as_a_service_node, + BgpAsAServiceEntryList &new_list, + const std::string &vrf_name); + + const Agent *agent_; + BgpAsAServiceEntryMap bgp_as_a_service_entry_map_; + ServiceDeleteCb service_delete_cb_; + DISALLOW_COPY_AND_ASSIGN(BgpAsAService); +}; +#endif diff --git a/src/vnsw/agent/oper/ifmap_dependency_manager.cc b/src/vnsw/agent/oper/ifmap_dependency_manager.cc index 385afa313de..14e7ef83a8c 100644 --- a/src/vnsw/agent/oper/ifmap_dependency_manager.cc +++ b/src/vnsw/agent/oper/ifmap_dependency_manager.cc @@ -71,6 +71,8 @@ IFMapDependencyManager::~IFMapDependencyManager() { void IFMapDependencyManager::Initialize(Agent *agent) { static const char *ifmap_types[] = { "access-control-list", + "bgp-as-a-service", + "bgp-router", "floating-ip", "floating-ip-pool", "instance-ip", @@ -657,6 +659,13 @@ void IFMapDependencyManager::InitializeDependencyRules(Agent *agent) { "physical-interface", false, "physical-router-physical-interface", "physical-router", true)); + AddDependencyPath("virtual-machine-interface", + MakePath("bgpaas-virtual-machine-interface", + "bgp-as-a-service", true, + "bgpaas-bgp-router", + "bgp-router", true, + "instance-bgp-router", + "routing-instance", true)); RegisterConfigHandler(this, "virtual-machine-interface", agent ? agent->interface_table() : NULL); //////////////////////////////////////////////////////////////////////// diff --git a/src/vnsw/agent/oper/operdb_init.cc b/src/vnsw/agent/oper/operdb_init.cc index cb7b085fafa..3bbcdca9434 100644 --- a/src/vnsw/agent/oper/operdb_init.cc +++ b/src/vnsw/agent/oper/operdb_init.cc @@ -41,6 +41,7 @@ #include #include #include +#include #include using boost::assign::map_list_of; @@ -209,6 +210,7 @@ void OperDB::CreateDBTables(DB *db) { agent_->set_physical_device_vn_table(dev_vn_table); profile_.reset(new AgentProfile(agent_, true)); vrouter_ = std::auto_ptr (new VRouter(this)); + bgp_as_a_service_ = std::auto_ptr(new BgpAsAService(agent_)); } void OperDB::Init() { diff --git a/src/vnsw/agent/oper/operdb_init.h b/src/vnsw/agent/oper/operdb_init.h index dda972cf973..8819974a000 100644 --- a/src/vnsw/agent/oper/operdb_init.h +++ b/src/vnsw/agent/oper/operdb_init.h @@ -21,6 +21,7 @@ class NexthopManager; class AgentSandeshManager; class AgentProfile; class VRouter; +class BgpAsAService; class OperDB { public: @@ -56,6 +57,7 @@ class OperDB { return agent_sandesh_manager_.get(); } VRouter *vrouter() const { return vrouter_.get(); } + BgpAsAService *bgp_as_a_service() const { return bgp_as_a_service_.get(); } AgentProfile *agent_profile() const { return profile_.get(); } private: @@ -72,6 +74,7 @@ class OperDB { std::auto_ptr agent_sandesh_manager_; std::auto_ptr profile_; std::auto_ptr vrouter_; + std::auto_ptr bgp_as_a_service_; DISALLOW_COPY_AND_ASSIGN(OperDB); }; #endif diff --git a/src/vnsw/agent/oper/vm_interface.cc b/src/vnsw/agent/oper/vm_interface.cc index 45329e4e4ec..e8ab8b8ef0c 100644 --- a/src/vnsw/agent/oper/vm_interface.cc +++ b/src/vnsw/agent/oper/vm_interface.cc @@ -37,10 +37,11 @@ #include #include -#include #include #include #include +#include +#include #include "sandesh/sandesh_trace.h" #include "sandesh/common/vns_types.h" #include "sandesh/common/vns_constants.h" @@ -120,49 +121,6 @@ bool VmInterface::CmpInterface(const DBEntry &rhs) const { return uuid_ < intf.uuid_; } -///////////////////////////////////////////////////////////////////////////// -// Template function to audit two lists. This is used to synchronize the -// operational and config list for Floating-IP, Service-Vlans, Static Routes -// and SG List -///////////////////////////////////////////////////////////////////////////// -template -bool AuditList(List &list, Iterator old_first, Iterator old_last, - Iterator new_first, Iterator new_last) { - bool ret = false; - Iterator old_iterator = old_first; - Iterator new_iterator = new_first; - while (old_iterator != old_last && new_iterator != new_last) { - if (old_iterator->IsLess(new_iterator.operator->())) { - Iterator bkp = old_iterator++; - list.Remove(bkp); - ret = true; - } else if (new_iterator->IsLess(old_iterator.operator->())) { - Iterator bkp = new_iterator++; - list.Insert(bkp.operator->()); - ret = true; - } else { - Iterator old_bkp = old_iterator++; - Iterator new_bkp = new_iterator++; - list.Update(old_bkp.operator->(), new_bkp.operator->()); - ret = true; - } - } - - while (old_iterator != old_last) { - Iterator bkp = old_iterator++; - list.Remove(bkp); - ret = true; - } - - while (new_iterator != new_last) { - Iterator bkp = new_iterator++; - list.Insert(bkp.operator->()); - ret = true; - } - - return ret; -} - // Build one Floating IP entry for a virtual-machine-interface static void BuildFloatingIpList(Agent *agent, VmInterfaceConfigData *data, IFMapNode *node) { @@ -914,6 +872,7 @@ bool InterfaceTable::VmiProcessConfig(IFMapNode *node, DBRequest &req, IFMapNode *vn_node = NULL; IFMapNode *li_node = NULL; IFMapNode *parent_vmi_node = NULL; + std::list bgp_as_a_service_node_list; for (DBGraphVertex::adjacency_iterator iter = node->begin(table->GetGraph()); iter != node->end(table->GetGraph()); ++iter) { @@ -971,8 +930,14 @@ bool InterfaceTable::VmiProcessConfig(IFMapNode *node, DBRequest &req, if (adj_node->table() == agent_->cfg()->cfg_vm_interface_table()) { parent_vmi_node = adj_node; } + + if (strcmp(adj_node->table()->Typename(), BGP_AS_SERVICE_CONFIG_NAME) == 0) { + bgp_as_a_service_node_list.push_back(adj_node); + } } + agent_->oper_db()->bgp_as_a_service()->ProcessConfig(data->vrf_name_, + bgp_as_a_service_node_list, u); UpdateAttributes(agent_, data); BuildFatFlowTable(agent_, data, node); @@ -1019,6 +984,7 @@ bool InterfaceTable::VmiIFNodeToReq(IFMapNode *node, DBRequest &req, // Handle object delete if ((req.oper == DBRequest::DB_ENTRY_DELETE) || node->IsDeleted()) { + agent_->oper_db()->bgp_as_a_service()->DeleteVmInterface(u); DelPhysicalDeviceVnEntry(u); return DeleteVmi(this, u, &req); } @@ -1420,7 +1386,6 @@ void VmInterface::ApplyConfigCommon(const VrfEntry *old_vrf, DeleteSecurityGroup(); DeleteFatFlow(); } - } void VmInterface::ApplyMacVmBindingConfig(const VrfEntry *old_vrf, @@ -1899,7 +1864,6 @@ bool VmInterface::CopyConfig(const InterfaceTable *table, logical_interface_ = data->logical_interface_; ret = true; } - Interface *new_parent = NULL; if (data->physical_interface_.empty() == false) { PhysicalInterfaceKey key(data->physical_interface_); diff --git a/src/vnsw/agent/oper/vm_interface.h b/src/vnsw/agent/oper/vm_interface.h index d8ae7d68a65..81459a03ddb 100644 --- a/src/vnsw/agent/oper/vm_interface.h +++ b/src/vnsw/agent/oper/vm_interface.h @@ -6,6 +6,7 @@ #define vnsw_agent_vm_interface_hpp #include +#include ///////////////////////////////////////////////////////////////////////////// // Implementation of VM Port interfaces @@ -24,6 +25,7 @@ class MetaDataIp; class HealthCheckInstance; class LocalVmPortPeer; + ///////////////////////////////////////////////////////////////////////////// // Definition for VmInterface // Agent supports multiple type of VMInterfaces diff --git a/src/vnsw/agent/oper/vn.cc b/src/vnsw/agent/oper/vn.cc index 9e0f7c4dc4a..17cff8d1c40 100644 --- a/src/vnsw/agent/oper/vn.cc +++ b/src/vnsw/agent/oper/vn.cc @@ -167,6 +167,22 @@ const VnIpam *VnEntry::GetIpam(const IpAddress &ip) const { return NULL; } +IpAddress VnEntry::GetGatewayFromIpam(const IpAddress &ip) const { + const VnIpam *ipam = GetIpam(ip); + if (ipam) { + return ipam->default_gw; + } + return IpAddress(); +} + +IpAddress VnEntry::GetDnsFromIpam(const IpAddress &ip) const { + const VnIpam *ipam = GetIpam(ip); + if (ipam) { + return ipam->dns_server; + } + return IpAddress(); +} + bool VnEntry::GetIpamVdnsData(const IpAddress &vm_addr, autogen::IpamType *ipam_type, autogen::VirtualDnsType *vdns_type) const { diff --git a/src/vnsw/agent/oper/vn.h b/src/vnsw/agent/oper/vn.h index 4eadeff0d18..c821fbdd795 100644 --- a/src/vnsw/agent/oper/vn.h +++ b/src/vnsw/agent/oper/vn.h @@ -142,6 +142,8 @@ class VnEntry : AgentRefCount, public AgentOperDBEntry { VrfEntry *GetVrf() const {return vrf_.get();}; const std::vector &GetVnIpam() const { return ipam_; }; const VnIpam *GetIpam(const IpAddress &ip) const; + IpAddress GetGatewayFromIpam(const IpAddress &ip) const; + IpAddress GetDnsFromIpam(const IpAddress &ip) const; bool GetVnHostRoutes(const std::string &ipam, std::vector *routes) const; bool GetIpamName(const IpAddress &vm_addr, std::string *ipam_name) const; @@ -179,6 +181,8 @@ class VnEntry : AgentRefCount, public AgentOperDBEntry { bool DBEntrySandesh(Sandesh *sresp, std::string &name) const; void SendObjectLog(AgentLogEvent::type event) const; void ResyncRoutes(); + bool IdentifyBgpRoutersServiceIp(const IpAddress &ip_address, + bool *is_dns, bool *is_gateway) const; private: friend class VnTable; diff --git a/src/vnsw/agent/pkt/flow_entry.cc b/src/vnsw/agent/pkt/flow_entry.cc index 229becee48f..ff3bc7e7c22 100644 --- a/src/vnsw/agent/pkt/flow_entry.cc +++ b/src/vnsw/agent/pkt/flow_entry.cc @@ -58,7 +58,8 @@ const std::map (DEFAULT_GW_ICMP_OR_DNS, "00000000-0000-0000-0000-000000000003") (LINKLOCAL_FLOW, "00000000-0000-0000-0000-000000000004") (MULTICAST_FLOW, "00000000-0000-0000-0000-000000000005") - (NON_IP_FLOW, "00000000-0000-0000-0000-000000000006"); + (NON_IP_FLOW, "00000000-0000-0000-0000-000000000006") + (BGPROUTERSERVICE_FLOW, "00000000-0000-0000-0000-000000000007"); const std::map FlowEntry::FlowDropReasonStr = boost::assign::map_list_of @@ -137,6 +138,7 @@ void FlowData::Reset() { enable_rpf = true; l2_rpf_plen = Address::kMaxV4PrefixLen; vm_cfg_name = ""; + bgp_as_a_service_port = 0; } ///////////////////////////////////////////////////////////////////////////// @@ -320,6 +322,13 @@ bool FlowEntry::InitFlowCmn(const PktFlowInfo *info, const PktControlInfo *ctrl, } else { reset_flags(FlowEntry::TcpAckFlow); } + if (info->bgp_router_service_flow) { + set_flags(FlowEntry::BgpRouterService); + data_.bgp_as_a_service_port = info->nat_sport; + } else { + reset_flags(FlowEntry::BgpRouterService); + data_.bgp_as_a_service_port = 0; + } data_.intf_entry = ctrl->intf_ ? ctrl->intf_ : rev_ctrl->intf_; data_.vn_entry = ctrl->vn_ ? ctrl->vn_ : rev_ctrl->vn_; @@ -826,9 +835,11 @@ void FlowEntry::GetPolicy(const VnEntry *vn, const FlowEntry *rflow) { data_.match_p.m_mirror_acl_l.push_back(acl); } - // Dont apply network-policy for linklocal and subnet broadcast flow + // Dont apply network-policy for linklocal, bgp router service + // and subnet broadcast flow if (is_flags_set(FlowEntry::LinkLocalFlow) || - is_flags_set(FlowEntry::Multicast)) { + is_flags_set(FlowEntry::Multicast) || + is_flags_set(FlowEntry::BgpRouterService)) { return; } @@ -1092,6 +1103,8 @@ uint32_t FlowEntry::MatchAcl(const PacketHeader &hdr, info->uuid = FlowPolicyStateStr.at(LINKLOCAL_FLOW); } else if (is_flags_set(FlowEntry::Multicast)) { info->uuid = FlowPolicyStateStr.at(MULTICAST_FLOW); + } else if (is_flags_set(FlowEntry::BgpRouterService)) { + info->uuid = FlowPolicyStateStr.at(BGPROUTERSERVICE_FLOW); } else { /* We need to make sure that info is not already populated * before setting it to IMPLICIT_ALLOW. This is required diff --git a/src/vnsw/agent/pkt/flow_entry.h b/src/vnsw/agent/pkt/flow_entry.h index 52c333a7d20..46ed44e97af 100644 --- a/src/vnsw/agent/pkt/flow_entry.h +++ b/src/vnsw/agent/pkt/flow_entry.h @@ -207,6 +207,7 @@ struct FlowData { uint32_t mirror_vrf; uint32_t dest_vrf; uint32_t component_nh_idx; + uint32_t bgp_as_a_service_port; // Stats uint8_t source_plen; @@ -270,6 +271,7 @@ class FlowEntry { DEFAULT_GW_ICMP_OR_DNS, /* DNS/ICMP pkt to/from default gateway */ LINKLOCAL_FLOW, /* No policy applied for linklocal flow */ MULTICAST_FLOW, /* No policy applied for multicast flow */ + BGPROUTERSERVICE_FLOW, /* No policy applied for bgp router service flow */ NON_IP_FLOW, /* Flow due to bridging */ }; @@ -300,7 +302,8 @@ class FlowEntry { // a local port bind is done (used as as src port for linklocal nat) LinkLocalBindLocalSrcPort = 1 << 9, TcpAckFlow = 1 << 10, - UnknownUnicastFlood = 1 << 11 + UnknownUnicastFlood = 1 << 11, + BgpRouterService = 1 << 12, }; FlowEntry(FlowTable *flow_table); @@ -385,6 +388,11 @@ class FlowEntry { const VmEntry *in_vm_entry() const { return data_.in_vm_entry.get(); } const VmEntry *out_vm_entry() const { return data_.out_vm_entry.get(); } const NextHop *nh() const { return data_.nh.get(); } + const uint32_t bgp_as_a_service_port() const { + if (is_flags_set(FlowEntry::BgpRouterService)) + return data_.bgp_as_a_service_port; + return 0; + } const MatchPolicy &match_p() const { return data_.match_p; } bool ImplicitDenyFlow() const { diff --git a/src/vnsw/agent/pkt/flow_mgmt.cc b/src/vnsw/agent/pkt/flow_mgmt.cc index 7a0dcdbe3d0..2716b9330fa 100644 --- a/src/vnsw/agent/pkt/flow_mgmt.cc +++ b/src/vnsw/agent/pkt/flow_mgmt.cc @@ -1,4 +1,8 @@ #include +#include +#include "cmn/agent.h" +#include "controller/controller_init.h" +#include "oper/bgp_as_service.h" #include "pkt/flow_proto.h" #include "pkt/flow_mgmt.h" #include "pkt/flow_mgmt_request.h" @@ -23,6 +27,10 @@ FlowMgmtManager::FlowMgmtManager(Agent *agent) : request_queue_(agent_->task_scheduler()->GetTaskId(kFlowMgmtTask), 1, boost::bind(&FlowMgmtManager::RequestHandler, this, _1)) { request_queue_.set_name("Flow management"); + for (uint8_t count = 0; count < MAX_XMPP_SERVERS; count++) { + bgp_as_a_service_flow_mgmt_tree_[count].reset( + new BgpAsAServiceFlowMgmtTree(this)); + } } void FlowMgmtManager::Init() { @@ -32,6 +40,12 @@ void FlowMgmtManager::Init() { agent_->acl_table()->set_acl_flow_sandesh_data_cb (boost::bind(&FlowMgmtManager::SetAclFlowSandeshData, this, _1, _2, _3)); + // If BGP service is deleted then flush off all the flows for the VMI. + agent_->oper_db()->bgp_as_a_service()->RegisterServiceDeleteCb(boost::bind + (&FlowMgmtManager::BgpAsAServiceNotify, this, _1, _2)); + // If control node goes off delete all flows frmo its tree. + agent_->controller()->RegisterControllerChangeCallback(boost::bind + (&FlowMgmtManager::ControllerNotify, this, _1)); } void FlowMgmtManager::Shutdown() { @@ -39,6 +53,23 @@ void FlowMgmtManager::Shutdown() { flow_mgmt_dbclient_->Shutdown(); } +///////////////////////////////////////////////////////////////////////////// +// BGP as a service callbacks +///////////////////////////////////////////////////////////////////////////// +void FlowMgmtManager::BgpAsAServiceNotify(const boost::uuids::uuid &vm_uuid, + uint32_t source_port) { + boost::shared_ptrreq + (new BgpAsAServiceFlowMgmtRequest(vm_uuid, source_port)); + request_queue_.Enqueue(req); +} + +void FlowMgmtManager::ControllerNotify(uint8_t index) { + boost::shared_ptrreq + (new BgpAsAServiceFlowMgmtRequest(index)); + request_queue_.Enqueue(req); +} + +///////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// // Introspect routines ///////////////////////////////////////////////////////////////////////////// @@ -200,6 +231,92 @@ bool FlowMgmtManager::DBEntryRequestHandler(FlowMgmtRequest *req, return true; } +///////////////////////////////////////////////////////////////////////////// +// Bgp as a service flow management +///////////////////////////////////////////////////////////////////////////// +bool BgpAsAServiceFlowMgmtEntry::NonOperEntryDelete(FlowMgmtManager *mgr, + const FlowMgmtRequest *req, + FlowMgmtKey *key) { + oper_state_ = OPER_DEL_SEEN; + gen_id_ = req->gen_id(); + FlowEvent::Event event = req->GetResponseEvent(); + if (event == FlowEvent::INVALID) + return false; + + Tree::iterator it = tree_.begin(); + while (it != tree_.end()) { + FlowEvent flow_resp(event, (*it)->key(), true); + flow_resp.set_flow(*it); + mgr->EnqueueFlowEvent(flow_resp); + it++; + } + return true; +} + +void BgpAsAServiceFlowMgmtTree::FreeNotify(FlowMgmtKey *key, uint32_t gen_id) { + assert(key->db_entry() == NULL); +} + +void BgpAsAServiceFlowMgmtTree::ExtractKeys(FlowEntry *flow, + FlowMgmtKeyTree *tree) { + if (flow->is_flags_set(FlowEntry::BgpRouterService) == false) + return; + const VmInterface *vm_intf = + dynamic_cast(flow->intf_entry()); + if (!vm_intf || (flow->bgp_as_a_service_port() == 0)) + return; + + BgpAsAServiceFlowMgmtKey *key = + new BgpAsAServiceFlowMgmtKey(vm_intf->GetUuid(), + flow->bgp_as_a_service_port()); + AddFlowMgmtKey(tree, key); +} + +FlowMgmtEntry *BgpAsAServiceFlowMgmtTree::Allocate(const FlowMgmtKey *key) { + return new BgpAsAServiceFlowMgmtEntry(); +} + +bool BgpAsAServiceFlowMgmtTree::BgpAsAServiceDelete +(BgpAsAServiceFlowMgmtKey &key, const FlowMgmtRequest *req) { + FlowMgmtEntry *entry = Find(&key); + if (entry == NULL) { + return true; + } + + entry->NonOperEntryDelete(mgr_, req, &key); + return TryDelete(&key, entry); +} + +void BgpAsAServiceFlowMgmtTree::DeleteAll() { + Tree::iterator it = tree_.begin(); + while (it != tree_.end()) { + BgpAsAServiceFlowMgmtKey *key = + static_cast(it->first); + mgr_->BgpAsAServiceNotify(key->uuid(), key->source_port()); + } +} + +bool +FlowMgmtManager::BgpAsAServiceRequestHandler(FlowMgmtRequest *req) { + + BgpAsAServiceFlowMgmtRequest *bgp_as_a_service_request = + dynamic_cast(req); + if (bgp_as_a_service_request->type() == BgpAsAServiceFlowMgmtRequest::VMI) { + BgpAsAServiceFlowMgmtKey key(bgp_as_a_service_request->vm_uuid(), + bgp_as_a_service_request->source_port()); + //Delete it for for all CN trees + for (uint8_t count = 0; count < MAX_XMPP_SERVERS; count++) { + bgp_as_a_service_flow_mgmt_tree_[count].get()-> + BgpAsAServiceDelete(key, req); + } + } else if (bgp_as_a_service_request->type() == + BgpAsAServiceFlowMgmtRequest::CONTROLLER) { + bgp_as_a_service_flow_mgmt_tree_[bgp_as_a_service_request->index()].get()-> + DeleteAll(); + } + return true; +} + bool FlowMgmtManager::RequestHandler(boost::shared_ptr req) { switch (req->event()) { case FlowMgmtRequest::ADD_FLOW: { @@ -240,6 +357,11 @@ bool FlowMgmtManager::RequestHandler(boost::shared_ptr req) { break; } + case FlowMgmtRequest::DELETE_BGP_AAS_FLOWS: { + BgpAsAServiceRequestHandler(req.get()); + break; + } + default: assert(0); @@ -270,6 +392,10 @@ void FlowMgmtManager::MakeFlowMgmtKeyTree(FlowEntry *flow, ip6_route_flow_mgmt_tree_.ExtractKeys(flow, tree); bridge_route_flow_mgmt_tree_.ExtractKeys(flow, tree); nh_flow_mgmt_tree_.ExtractKeys(flow, tree); + for (uint8_t count = 0; count < MAX_XMPP_SERVERS; count++) { + bgp_as_a_service_flow_mgmt_tree_[count].get()-> + ExtractKeys(flow, tree); + } } void FlowMgmtManager::AddFlow(FlowEntryPtr &flow) { @@ -478,6 +604,11 @@ void FlowMgmtManager::AddFlowMgmtKey(FlowEntry *flow, FlowEntryInfo *info, nh_flow_mgmt_tree_.Add(key, flow); break; + case FlowMgmtKey::BGPASASERVICE: + for (uint8_t count = 0; count < MAX_XMPP_SERVERS; count++) + bgp_as_a_service_flow_mgmt_tree_[count].get()->Add(key, flow); + break; + default: assert(0); } @@ -526,6 +657,11 @@ void FlowMgmtManager::DeleteFlowMgmtKey(FlowEntry *flow, FlowEntryInfo *info, nh_flow_mgmt_tree_.Delete(key, flow); break; + case FlowMgmtKey::BGPASASERVICE: + for (uint8_t count = 0; count < MAX_XMPP_SERVERS; count++) + bgp_as_a_service_flow_mgmt_tree_[count].get()->Delete(key, flow); + break; + default: assert(0); } @@ -554,6 +690,8 @@ FlowEvent::Event FlowMgmtKey::FreeDBEntryEvent() const { case VM: event = FlowEvent::INVALID; break; + case BGPASASERVICE: + event = FlowEvent::INVALID; default: assert(0); @@ -1249,7 +1387,7 @@ void BridgeRouteFlowMgmtTree::ExtractKeys(FlowEntry *flow, } FlowMgmtEntry *BridgeRouteFlowMgmtTree::Allocate(const FlowMgmtKey *key) { - return new BridgeRouteFlowMgmtEntry(); + return new BgpAsAServiceFlowMgmtEntry(); } bool BridgeRouteFlowMgmtTree::HasVrfFlows(uint32_t vrf, diff --git a/src/vnsw/agent/pkt/flow_mgmt.h b/src/vnsw/agent/pkt/flow_mgmt.h index 544b74f4a3c..9b1130e279a 100644 --- a/src/vnsw/agent/pkt/flow_mgmt.h +++ b/src/vnsw/agent/pkt/flow_mgmt.h @@ -4,6 +4,7 @@ #ifndef __AGENT_FLOW_TABLE_MGMT_H__ #define __AGENT_FLOW_TABLE_MGMT_H__ +#include #include "pkt/flow_table.h" #include "pkt/flow_mgmt_request.h" #include "pkt/flow_event.h" @@ -235,6 +236,10 @@ class FlowMgmtDbClient; // for the VRF. But is used to ensure VRF entry is // not deleted till all route-entries are freed and // DELETE event for VRF is processed +// - BgpAsAServiceFlowMgmtTree : FlowMgmtTree per control-node. This is +// maintained per control node because VMI can +// establish a bgp peer session with each control +// node. //////////////////////////////////////////////////////////////////////////// class FlowMgmtKey { public: @@ -250,6 +255,7 @@ class FlowMgmtKey { BRIDGE, NH, VRF, + BGPASASERVICE, END }; @@ -338,6 +344,10 @@ class FlowMgmtEntry { // Handle Delete event for DBEntry virtual bool OperEntryDelete(FlowMgmtManager *mgr, const FlowMgmtRequest *req, FlowMgmtKey *key); + // Handle Delete event for Non-DBEntry + virtual bool NonOperEntryDelete(FlowMgmtManager *mgr, + const FlowMgmtRequest *req, + FlowMgmtKey *key) { return true; } // Can the entry be deleted? virtual bool CanDelete() const; @@ -908,6 +918,65 @@ class VrfFlowMgmtTree : public FlowMgmtTree { DISALLOW_COPY_AND_ASSIGN(VrfFlowMgmtTree); }; +//////////////////////////////////////////////////////////////////////////// +// Flow Management tree for bgp as a service. +//////////////////////////////////////////////////////////////////////////// +class BgpAsAServiceFlowMgmtKey : public FlowMgmtKey { +public: + BgpAsAServiceFlowMgmtKey(const boost::uuids::uuid &uuid, + uint32_t source_port) : + FlowMgmtKey(FlowMgmtKey::BGPASASERVICE, NULL), uuid_(uuid), + source_port_(source_port) { } + virtual ~BgpAsAServiceFlowMgmtKey() { } + virtual FlowMgmtKey *Clone() { + return new BgpAsAServiceFlowMgmtKey(uuid_, source_port_); + } + virtual bool UseDBEntry() const { return false; } + virtual bool Compare(const FlowMgmtKey *rhs) const { + const BgpAsAServiceFlowMgmtKey *rhs_key = + static_cast(rhs); + if (uuid_ != rhs_key->uuid_) + return uuid_ < rhs_key->uuid_; + return source_port_ < rhs_key->source_port_; + } + const boost::uuids::uuid &uuid() const { return uuid_; } + uint32_t source_port() const { return source_port_; } + +private: + boost::uuids::uuid uuid_; + uint32_t source_port_; + DISALLOW_COPY_AND_ASSIGN(BgpAsAServiceFlowMgmtKey); +}; + +class BgpAsAServiceFlowMgmtEntry : public FlowMgmtEntry { +public: + BgpAsAServiceFlowMgmtEntry() : FlowMgmtEntry() { } + virtual ~BgpAsAServiceFlowMgmtEntry() { } + virtual bool NonOperEntryDelete(FlowMgmtManager *mgr, + const FlowMgmtRequest *req, + FlowMgmtKey *key); + +private: + DISALLOW_COPY_AND_ASSIGN(BgpAsAServiceFlowMgmtEntry); +}; + +class BgpAsAServiceFlowMgmtTree : public FlowMgmtTree { +public: + BgpAsAServiceFlowMgmtTree(FlowMgmtManager *mgr) : FlowMgmtTree(mgr) {} + virtual ~BgpAsAServiceFlowMgmtTree() {} + + void ExtractKeys(FlowEntry *flow, FlowMgmtKeyTree *tree); + FlowMgmtEntry *Allocate(const FlowMgmtKey *key); + bool BgpAsAServiceDelete(BgpAsAServiceFlowMgmtKey &key, + const FlowMgmtRequest *req); + void DeleteAll(); + // Called just before entry is deleted. Used to implement cleanup operations + virtual void FreeNotify(FlowMgmtKey *key, uint32_t gen_id); +private: + DISALLOW_COPY_AND_ASSIGN(BgpAsAServiceFlowMgmtTree); +}; + + class FlowMgmtManager { public: static const std::string kFlowMgmtTask; @@ -945,7 +1014,7 @@ class FlowMgmtManager { bool DBEntryRequestHandler(FlowMgmtRequest *req, const DBEntry *entry); bool RequestHandler(boost::shared_ptr req); - + bool BgpAsAServiceRequestHandler(FlowMgmtRequest *req); bool DbClientHandler(const DBEntry *entry); void EnqueueFlowEvent(const FlowEvent &event); @@ -969,6 +1038,8 @@ class FlowMgmtManager { } void DisableWorkQueue(bool disable) { request_queue_.set_disable(disable); } + void BgpAsAServiceNotify(const boost::uuids::uuid &vm_uuid, + uint32_t source_port); private: // Handle Add/Change of a flow. Builds FlowMgmtKeyTree for all objects void AddFlow(FlowEntryPtr &flow); @@ -993,6 +1064,7 @@ class FlowMgmtManager { int ace_id); void SetAclFlowSandeshData(const AclDBEntry *acl, AclFlowResp &data, const int last_count); + void ControllerNotify(uint8_t index); Agent *agent_; AclFlowMgmtTree acl_flow_mgmt_tree_; @@ -1004,6 +1076,7 @@ class FlowMgmtManager { VrfFlowMgmtTree vrf_flow_mgmt_tree_; NhFlowMgmtTree nh_flow_mgmt_tree_; FlowEntryTree flow_tree_; + boost::scoped_ptr bgp_as_a_service_flow_mgmt_tree_[MAX_XMPP_SERVERS]; std::auto_ptr flow_mgmt_dbclient_; WorkQueue > request_queue_; DISALLOW_COPY_AND_ASSIGN(FlowMgmtManager); diff --git a/src/vnsw/agent/pkt/flow_mgmt_request.h b/src/vnsw/agent/pkt/flow_mgmt_request.h index d803fc57ce6..c54a073ab81 100644 --- a/src/vnsw/agent/pkt/flow_mgmt_request.h +++ b/src/vnsw/agent/pkt/flow_mgmt_request.h @@ -20,8 +20,8 @@ class FlowMgmtRequest { CHANGE_DBENTRY, DELETE_DBENTRY, RETRY_DELETE_VRF, - UPDATE_FLOW_INDEX - + UPDATE_FLOW_INDEX, + DELETE_BGP_AAS_FLOWS }; FlowMgmtRequest(Event event, FlowEntryPtr &flow) : @@ -47,6 +47,9 @@ class FlowMgmtRequest { // response. Returns INVALID if no message to be enqueued FlowEvent::Event GetResponseEvent() const { FlowEvent::Event resp_event = FlowEvent::INVALID; + if (event_ == DELETE_BGP_AAS_FLOWS) + return FlowEvent::DELETE_FLOW; + if (db_entry_ == NULL) return resp_event; @@ -92,4 +95,33 @@ class FlowMgmtRequest { DISALLOW_COPY_AND_ASSIGN(FlowMgmtRequest); }; +class BgpAsAServiceFlowMgmtRequest : public FlowMgmtRequest { +public: + enum Type { + VMI, + CONTROLLER + }; + + BgpAsAServiceFlowMgmtRequest(uint8_t index) : + FlowMgmtRequest(FlowMgmtRequest::DELETE_BGP_AAS_FLOWS, NULL, 0), + type_(BgpAsAServiceFlowMgmtRequest::CONTROLLER), vm_uuid_(), + source_port_(), index_(index) { } + BgpAsAServiceFlowMgmtRequest(boost::uuids::uuid vm_uuid, + uint32_t source_port) : + FlowMgmtRequest(FlowMgmtRequest::DELETE_BGP_AAS_FLOWS, NULL, 0), + type_(BgpAsAServiceFlowMgmtRequest::VMI), vm_uuid_(vm_uuid), + source_port_(source_port), index_() { } + virtual ~BgpAsAServiceFlowMgmtRequest() { } + BgpAsAServiceFlowMgmtRequest::Type type() const { return type_; } + const boost::uuids::uuid &vm_uuid() const { return vm_uuid_; } + uint32_t source_port() const { return source_port_; } + uint8_t index() const { return index_; } + +private: + BgpAsAServiceFlowMgmtRequest::Type type_; + boost::uuids::uuid vm_uuid_; + uint32_t source_port_; + uint8_t index_; + DISALLOW_COPY_AND_ASSIGN(BgpAsAServiceFlowMgmtRequest); +}; #endif // __AGENT_FLOW_MGMT_REQUEST_H__ diff --git a/src/vnsw/agent/pkt/flow_table.cc b/src/vnsw/agent/pkt/flow_table.cc index 91456c5f4d2..16b2db875d0 100644 --- a/src/vnsw/agent/pkt/flow_table.cc +++ b/src/vnsw/agent/pkt/flow_table.cc @@ -350,6 +350,19 @@ void FlowTable::UpdateReverseFlow(FlowEntry *flow, FlowEntry *rflow) { } } + if (rflow && rflow->is_flags_set(FlowEntry::BgpRouterService)) { + //In BGP router service for some reason if tcp connection does not + //succeed, then client will try again with new source port and this will + //create a new flow. Now there will be two flows - one with old source + //port and other with new source port. However both of them will have + //same reverse flow as its is nat'd with fabric sip/dip. + //To avoid this delete old flow and dont let new flow to be short flow. + if (rflow_rev) { + Delete(rflow_rev->key(), false); + rflow_rev = NULL; + } + } + if (rflow_rev && (rflow_rev->reverse_flow_entry() == NULL)) { rflow_rev->MakeShortFlow(FlowEntry::SHORT_NO_REVERSE_FLOW); if (ValidFlowMove(flow, rflow_rev) == false) { diff --git a/src/vnsw/agent/pkt/pkt_flow_info.cc b/src/vnsw/agent/pkt/pkt_flow_info.cc index 6b8aa1ab2d3..fafbf7de037 100644 --- a/src/vnsw/agent/pkt/pkt_flow_info.cc +++ b/src/vnsw/agent/pkt/pkt_flow_info.cc @@ -22,6 +22,7 @@ #include "oper/global_vrouter.h" #include "oper/operdb_init.h" #include "oper/tunnel_nh.h" +#include "oper/bgp_as_service.h" #include "filter/packet_header.h" #include "filter/acl.h" @@ -418,7 +419,13 @@ static bool IntfHasFloatingIp(PktFlowInfo *pkt_info, const Interface *intf, return static_cast(intf)->HasFloatingIp(family); } -static bool IsLinkLocalRoute(Agent *agent, const AgentRoute *rt) { +static bool IsLinkLocalRoute(Agent *agent, const AgentRoute *rt, + uint32_t sport, uint32_t dport) { + //Local CN and BGP has been allowed for testing purpose. + if ((sport == BgpAsAService::DefaultBgpPort) || + (dport == BgpAsAService::DefaultBgpPort)) + return false; + const AgentPath *path = rt->GetActivePath(); if (path && path->peer() == agent->link_local_peer()) return true; @@ -426,6 +433,40 @@ static bool IsLinkLocalRoute(Agent *agent, const AgentRoute *rt) { return false; } +bool PktFlowInfo::IsBgpRouterServiceRoute(const AgentRoute *in_rt, + const AgentRoute *out_rt, + const Interface *intf, + uint32_t sport, + uint32_t dport) { + if (bgp_router_service_flow) + return true; + + if (intf == NULL || in_rt == NULL || out_rt == NULL) + return false; + + if ((sport != BgpAsAService::DefaultBgpPort) && + (dport != BgpAsAService::DefaultBgpPort)) + return false; + + if (intf->type() == Interface::VM_INTERFACE) { + const VmInterface *vm_intf = + dynamic_cast(intf); + const InetUnicastRouteEntry *in_inet_rt = + dynamic_cast(in_rt); + const InetUnicastRouteEntry *out_inet_rt = + dynamic_cast(out_rt); + if (in_inet_rt == NULL || out_inet_rt == NULL) + return false; + if (agent->oper_db()->bgp_as_a_service()-> + IsBgpService(vm_intf, in_inet_rt->addr(), out_inet_rt->addr())) { + bgp_router_service_flow = true; + return true; + } + } + + return false; +} + static const string *RouteToVn(const AgentRoute *rt) { const AgentPath *path = NULL; if (rt) { @@ -519,12 +560,15 @@ static void SetInEcmpIndex(const PktInfo *pkt, PktFlowInfo *flow_info, } } -static bool RouteAllowNatLookup(Agent *agent, const AgentRoute *rt) { +bool PktFlowInfo::RouteAllowNatLookupCommon(const AgentRoute *rt, + uint32_t sport, + uint32_t dport, + const Interface *intf) { // No NAT for bridge routes if (dynamic_cast(rt) != NULL) return false; - if (rt != NULL && IsLinkLocalRoute(agent, rt)) { + if (rt != NULL && IsLinkLocalRoute(agent, rt, sport, dport)) { // skip NAT lookup if found route has link local peer. return false; } @@ -532,6 +576,35 @@ static bool RouteAllowNatLookup(Agent *agent, const AgentRoute *rt) { return true; } +bool PktFlowInfo::IngressRouteAllowNatLookup(const AgentRoute *in_rt, + const AgentRoute *out_rt, + uint32_t sport, + uint32_t dport, + const Interface *intf) { + if (RouteAllowNatLookupCommon(out_rt, sport, dport, intf) == false) { + return false; + } + + if (IsBgpRouterServiceRoute(in_rt, out_rt, intf, sport, dport)) { + // skip NAT lookup if found route has link local peer. + return false; + } + + return true; +} + +bool PktFlowInfo::EgressRouteAllowNatLookup(const AgentRoute *in_rt, + const AgentRoute *out_rt, + uint32_t sport, + uint32_t dport, + const Interface *intf) { + if (RouteAllowNatLookupCommon(out_rt, sport, dport, intf) == false) { + return false; + } + + return true; +} + void PktFlowInfo::SetEcmpFlowInfo(const PktInfo *pkt, const PktControlInfo *in, const PktControlInfo *out) { nat_ip_daddr = pkt->ip_daddr; @@ -747,6 +820,72 @@ void PktFlowInfo::LinkLocalServiceTranslate(const PktInfo *pkt, PktControlInfo * } } +void PktFlowInfo::BgpRouterServiceFromVm(const PktInfo *pkt, PktControlInfo *in, + PktControlInfo *out) { + + // Link local services supported only for IPv4 for now + if (pkt->family != Address::INET) { + in->rt_ = NULL; + out->rt_ = NULL; + return; + } + + const VmInterface *vm_port = + static_cast(in->intf_); + + const VnEntry *vn = static_cast(vm_port->vn()); + uint32_t sport = 0; + IpAddress nat_server = IpAddress(); + + if (vn == NULL) { + in->rt_ = NULL; + out->rt_ = NULL; + return; + } + + if (agent->oper_db()->bgp_as_a_service()-> + GetBgpRouterServiceDestination(vm_port, + pkt->ip_saddr.to_v4(), + pkt->ip_daddr.to_v4(), + &nat_server, + &sport) == false) { + return; + } + + out->vrf_ = agent->vrf_table()->FindVrfFromName(agent->fabric_vrf_name()); + dest_vrf = out->vrf_->vrf_id(); + + nat_done = true; + //Populate NAT + nat_ip_saddr = agent->router_id(); + nat_ip_daddr = nat_server; + nat_sport = sport; + nat_dport = pkt->dport; + if ((nat_ip_daddr == agent->router_id()) && + (nat_ip_daddr == nat_ip_saddr)) { + boost::system::error_code ec; + //TODO may be use MDATA well known address. + nat_ip_saddr = vm_port->mdata_ip_addr(); + } + + nat_vrf = dest_vrf; + nat_dest_vrf = vm_port->vrf_id(); + + + out->rt_ = FlowEntry::GetUcRoute(out->vrf_, nat_server); + out->intf_ = agent->vhost_interface(); + out->nh_ = out->intf_->flow_key_nh()->id(); + return; +} + +void PktFlowInfo::BgpRouterServiceTranslate(const PktInfo *pkt, + PktControlInfo *in, + PktControlInfo *out) { + if (in->intf_->type() == Interface::VM_INTERFACE) { + BgpRouterServiceFromVm(pkt, in, out); + } +} + // DestNAT for packets entering into a VM with floating-ip. // Can come here in two paths, // - Packet originated on local vm. @@ -1067,7 +1206,11 @@ void PktFlowInfo::IngressProcess(const PktInfo *pkt, PktControlInfo *in, } } - if (RouteAllowNatLookup(agent, out->rt_)) { + if (IngressRouteAllowNatLookup(in->rt_, + out->rt_, + pkt->sport, + pkt->dport, + in->intf_)) { // If interface has floating IP, check if we have more specific route in // public VN (floating IP) if (IntfHasFloatingIp(this, in->intf_, pkt->family)) { @@ -1085,11 +1228,19 @@ void PktFlowInfo::IngressProcess(const PktInfo *pkt, PktControlInfo *in, } // Packets needing linklocal service will have route added by LinkLocal peer - if ((in->rt_ && IsLinkLocalRoute(agent, in->rt_)) || - (out->rt_ && IsLinkLocalRoute(agent, out->rt_))) { + if ((in->rt_ && IsLinkLocalRoute(agent, in->rt_, pkt->sport, pkt->dport)) || + (out->rt_ && IsLinkLocalRoute(agent, out->rt_, + pkt->sport, pkt->dport))) { LinkLocalServiceTranslate(pkt, in, out); } + //Packets needing bgp router service handling + if (IsBgpRouterServiceRoute(in->rt_, out->rt_, + in->intf_, pkt->sport, + pkt->dport)) { + BgpRouterServiceTranslate(pkt, in, out); + } + // If out-interface was not found, get it based on out-route if (out->intf_ == NULL && out->rt_) { RouteToOutInfo(out->rt_, pkt, this, in, out); @@ -1191,7 +1342,11 @@ void PktFlowInfo::EgressProcess(const PktInfo *pkt, PktControlInfo *in, return; } - if (RouteAllowNatLookup(agent, out->rt_)) { + if (EgressRouteAllowNatLookup(in->rt_, + out->rt_, + pkt->sport, + pkt->dport, + out->intf_)) { // If interface has floating IP, check if destination is one of the // configured floating IP. if (IntfHasFloatingIp(this, out->intf_, pkt->family)) { @@ -1509,7 +1664,8 @@ void PktFlowInfo::Add(const PktInfo *pkt, PktControlInfo *in, // In case the packet is for a reverse flow of a linklocal flow, // link to that flow (avoid creating a new reverse flow entry for the case) FlowEntryPtr rflow = flow->reverse_flow_entry(); - if (rflow && rflow->is_flags_set(FlowEntry::LinkLocalBindLocalSrcPort)) { + if (rflow && (rflow->is_flags_set(FlowEntry::LinkLocalBindLocalSrcPort) || + rflow->is_flags_set(FlowEntry::BgpRouterService))) { return; } diff --git a/src/vnsw/agent/pkt/pkt_flow_info.h b/src/vnsw/agent/pkt/pkt_flow_info.h index f75a2f7002a..8eed6ff3173 100644 --- a/src/vnsw/agent/pkt/pkt_flow_info.h +++ b/src/vnsw/agent/pkt/pkt_flow_info.h @@ -39,6 +39,7 @@ class PktFlowInfo { static const int kLinkLocalInvalidFd = -1; static const Ip4Address kDefaultIpv4; static const Ip6Address kDefaultIpv6; + static const int kBgpRouterServiceInvalidFd = -1; PktFlowInfo(Agent *a, boost::shared_ptr info, FlowTable *ftable) : l3_flow(info->l3_forwarding), family(info->family), pkt(info), @@ -52,7 +53,7 @@ class PktFlowInfo { ecmp(false), in_component_nh_idx(-1), out_component_nh_idx(-1), trap_rev_flow(false), fip_snat(false), fip_dnat(false), snat_fip(), short_flow_reason(0), peer_vrouter(), tunnel_type(TunnelType::INVALID), - flood_unknown_unicast(false) { + flood_unknown_unicast(false), bgp_router_service_flow(false) { } static bool ComputeDirection(const Interface *intf); @@ -63,6 +64,10 @@ class PktFlowInfo { PktControlInfo *out); void LinkLocalServiceTranslate(const PktInfo *pkt, PktControlInfo *in, PktControlInfo *out); + void BgpRouterServiceFromVm(const PktInfo *pkt, PktControlInfo *in, + PktControlInfo *out); + void BgpRouterServiceTranslate(const PktInfo *pkt, PktControlInfo *in, + PktControlInfo *out); void FloatingIpSNat(const PktInfo *pkt, PktControlInfo *in, PktControlInfo *out); void FloatingIpDNat(const PktInfo *pkt, PktControlInfo *in, @@ -96,13 +101,32 @@ class PktFlowInfo { void ApplyFlowLimits(const PktControlInfo *in, const PktControlInfo *out); void LinkLocalPortBind(const PktInfo *pkt, const PktControlInfo *in, const FlowEntry *flow); - + bool IngressRouteAllowNatLookup(const AgentRoute *in_rt, + const AgentRoute *out_rt, + uint32_t sport, + uint32_t dport, + const Interface *intf); + bool EgressRouteAllowNatLookup(const AgentRoute *in_rt, + const AgentRoute *out_rt, + uint32_t sport, + uint32_t dport, + const Interface *intf); public: void UpdateRoute(const AgentRoute **rt, const VrfEntry *vrf, const IpAddress &addr, const MacAddress &mac, FlowRouteRefMap &ref_map); uint8_t RouteToPrefixLen(const AgentRoute *route); void CalculatePort(const PktInfo *p, const Interface *intf); + bool RouteAllowNatLookupCommon(const AgentRoute *rt, + uint32_t sport, + uint32_t dport, + const Interface *intf); + bool IsBgpRouterServiceRoute(const AgentRoute *in_rt, + const AgentRoute *out_rt, + const Interface *intf, + uint32_t sport, + uint32_t dport); + bool l3_flow; Address::Family family; boost::shared_ptr pkt; @@ -161,6 +185,9 @@ class PktFlowInfo { // flow entry obtained from flow IPC, which requires recomputation. FlowEntry *flow_entry; bool flood_unknown_unicast; + + //BGP router service info + bool bgp_router_service_flow; }; #endif // __agent_pkt_flow_info_h_ diff --git a/src/vnsw/agent/vrouter/ksync/flowtable_ksync.cc b/src/vnsw/agent/vrouter/ksync/flowtable_ksync.cc index 2876f375098..96414b56ff1 100644 --- a/src/vnsw/agent/vrouter/ksync/flowtable_ksync.cc +++ b/src/vnsw/agent/vrouter/ksync/flowtable_ksync.cc @@ -299,7 +299,9 @@ int FlowTableKSyncEntry::Encode(sandesh_op::type op, char *buf, int buf_len) { flags |= VR_FLOW_FLAG_DPAT; } } - if (nat_flow->is_flags_set(FlowEntry::LinkLocalBindLocalSrcPort)) { + //TODO Seperate flags for BgpRouterService?? + if (nat_flow->is_flags_set(FlowEntry::LinkLocalBindLocalSrcPort) || + nat_flow->is_flags_set(FlowEntry::BgpRouterService)) { flags |= VR_FLOW_FLAG_LINK_LOCAL; }