diff --git a/bgpd/ChangeLog b/bgpd/ChangeLog --- a/bgpd/ChangeLog +++ b/bgpd/ChangeLog @@ -1,3 +1,62 @@ +2005-11-08 Paul Jakma + + * bgp_route.c: (bgp_pcount_increment) Increment pcount if + appropriate. + (bgp_pcount_decrement) ditto, but decrement. + Dont decrement past 0, log a warning if thats attempted. + (bgp_rib_remove) use bgp_pcount_decrement. + (bgp_rib_withdraw) ditto, + (bgp_update_main) Use bgp_pcount_{decrement,increment} and + try straighten out decrements, increments. Ie, decrement + immediately, if not already HISTORY, then increment once + re-accepted, for the implicit update path. + (bgp_clear_route_node) Use bgp_pcount_decrement. + Dont un-STALE a REMOVED info (just in case). + +2005-11-07 Paul Jakma + + * bgpd.h: move the clear_node_queue to be peer specific. + Add a new peer status flag, PEER_STATUS_CLEARING. + * bgp_table.h: (struct bgp_table) Add fields to record afi, + safi of the table. + (bgp_table_init) Take afi and safi to create table for. + * bgp_table.c: (bgp_table_init) record the afi and safi. + * bgp_nexthop.c: Update all calls to bgp_table_init. + * bgp_vty.c: ditto. + * bgpd.c: ditto. + * bgp_fsm.c: (bgp_timer_set) dont bring up a session which is + clearing. + * bgp_route.c: (general) Update all bgp_table_init calls. + (bgp_process_{rsclient,main}) clear_node is serialised + via PEER_STATUS_CLEARING and fsm now. + (struct bgp_clear_node_queue) can be removed. struct bgp_node + can be the queue item data directly, as struct peer can be + kept in the new wq global user data and afi/safi can be + retrieved via bgp_node -> bgp_table. + (bgp_clear_route_node) fix to get peer via wq->spec.data, + afi/safi via bgp_node->bgp_table. + (bgp_clear_node_queue_del) no more item data to delete, only + unlock the bgp_node. + (bgp_clear_node_complete) only need to unset CLEARING flag + and unlock struct peer. + (bgp_clear_node_queue_init) queue attaches to struct peer + now. record peer name as queue name. + (bgp_clear_route_table) If queue transitions to active, + serialise clearing by setting PEER_STATUS_CLEARING rather + than plugging process queue, and lock peer while queue + active. + Update to pass only bgp_node as per-queue-item specific data. + +2005-11-07 Paul Jakma + + * bgp_route.c: (bgp_process_rsclient) convert to new workqueue + specs and shut up gcc, which complains about cast from void + via function parameters, for some dumb reason. Do the cast + inside the function instead. + (bgp_process_main,bgp_processq_del) ditto. + (bgp_clear_route_node) ditto. + (bgp_clear_node_queue_del) ditto. + 2005-11-03 Paul Jakma * bgp_damp.c: (bgp_reuse_timer) struct bgp can be retrieved via diff --git a/bgpd/bgp_damp.c b/bgpd/bgp_damp.c --- a/bgpd/bgp_damp.c +++ b/bgpd/bgp_damp.c @@ -155,8 +155,9 @@ bgp_reuse_timer (struct thread *t) { UNSET_FLAG (bdi->binfo->flags, BGP_INFO_HISTORY); bgp_aggregate_increment (bgp, &bdi->rn->p, bdi->binfo, - bdi->afi, bdi->safi); - bgp_process (bgp, bdi->rn, bdi->afi, bdi->safi); + bdi->rn->table->afi, + bdi->rn->table->safi); + bgp_process (bgp, bdi->rn); } if (bdi->penalty <= damp->reuse_limit / 2.0) @@ -203,8 +204,6 @@ bgp_damp_withdraw (struct bgp_info *binf bdi->start_time = t_now; bdi->suppress_time = 0; bdi->index = -1; - bdi->afi = afi; - bdi->safi = safi; binfo->damp_info = bdi; BGP_DAMP_LIST_ADD (damp, bdi); } @@ -255,8 +254,7 @@ bgp_damp_withdraw (struct bgp_info *binf } int -bgp_damp_update (struct bgp_info *binfo, struct bgp_node *rn, - afi_t afi, safi_t safi) +bgp_damp_update (struct bgp_info *binfo, struct bgp_node *rn) { time_t t_now; struct bgp_damp_info *bdi; @@ -615,11 +613,11 @@ bgp_damp_info_vty (struct vty *vty, stru } char * -bgp_damp_reuse_time_vty (struct vty *vty, struct bgp_info *binfo) +bgp_damp_reuse_time_vty (struct vty *vty, struct bgp_info *binfo, + char *timebuf, size_t len) { struct bgp_damp_info *bdi; time_t t_now, t_diff; - char timebuf[BGP_UPTIME_LEN]; int penalty; /* BGP dampening information. */ @@ -629,11 +627,11 @@ bgp_damp_reuse_time_vty (struct vty *vty return immediately. */ if (! damp || ! bdi) return NULL; - + /* Calculate new penalty. */ t_now = time (NULL); t_diff = t_now - bdi->t_updated; penalty = bgp_damp_decay (t_diff, bdi->penalty); - return bgp_get_reuse_time (penalty, timebuf, BGP_UPTIME_LEN); + return bgp_get_reuse_time (penalty, timebuf, len); } diff --git a/bgpd/bgp_damp.h b/bgpd/bgp_damp.h --- a/bgpd/bgp_damp.h +++ b/bgpd/bgp_damp.h @@ -57,9 +57,6 @@ struct bgp_damp_info u_char lastrecord; #define BGP_RECORD_UPDATE 1U #define BGP_RECORD_WITHDRAW 2U - - afi_t afi; - safi_t safi; }; /* Specified parameter set configuration. */ @@ -134,7 +131,7 @@ extern int bgp_damp_enable (struct bgp * extern int bgp_damp_disable (struct bgp *, afi_t, safi_t); extern int bgp_damp_withdraw (struct bgp_info *, struct bgp_node *, afi_t, safi_t, int); -extern int bgp_damp_update (struct bgp_info *, struct bgp_node *, afi_t, safi_t); +extern int bgp_damp_update (struct bgp_info *, struct bgp_node *); extern int bgp_damp_scan (struct bgp_info *, afi_t, safi_t); extern void bgp_damp_info_free (struct bgp_damp_info *, int); extern void bgp_damp_info_clean (void); @@ -142,6 +139,7 @@ extern char * bgp_get_reuse_time (unsign extern int bgp_damp_decay (time_t, int); extern int bgp_config_write_damp (struct vty *); extern void bgp_damp_info_vty (struct vty *, struct bgp_info *); -extern char * bgp_damp_reuse_time_vty (struct vty *, struct bgp_info *); +extern char * bgp_damp_reuse_time_vty (struct vty *, struct bgp_info *, + char*, size_t); #endif /* _QUAGGA_BGP_DAMP_H */ diff --git a/bgpd/bgp_fsm.c b/bgpd/bgp_fsm.c --- a/bgpd/bgp_fsm.c +++ b/bgpd/bgp_fsm.c @@ -84,6 +84,7 @@ bgp_timer_set (struct peer *peer) inactive. All other timer must be turned off */ if (CHECK_FLAG (peer->flags, PEER_FLAG_SHUTDOWN) || CHECK_FLAG (peer->sflags, PEER_STATUS_PREFIX_OVERFLOW) + || CHECK_FLAG (peer->sflags, PEER_STATUS_CLEARING) || ! peer_active (peer)) { BGP_TIMER_OFF (peer->t_start); diff --git a/bgpd/bgp_nexthop.c b/bgpd/bgp_nexthop.c --- a/bgpd/bgp_nexthop.c +++ b/bgpd/bgp_nexthop.c @@ -496,7 +496,7 @@ bgp_scan (afi_t afi, safi_t safi) afi, SAFI_UNICAST); } } - bgp_process (bgp, rn, afi, SAFI_UNICAST); + bgp_process (bgp, rn); } /* Flash old cache. */ @@ -1292,17 +1292,17 @@ bgp_scan_init () bgp_scan_interval = BGP_SCAN_INTERVAL_DEFAULT; bgp_import_interval = BGP_IMPORT_INTERVAL_DEFAULT; - cache1_table[AFI_IP] = bgp_table_init (); - cache2_table[AFI_IP] = bgp_table_init (); + cache1_table[AFI_IP] = bgp_table_init (AFI_IP, SAFI_UNICAST); + cache2_table[AFI_IP] = bgp_table_init (AFI_IP, SAFI_UNICAST); bgp_nexthop_cache_table[AFI_IP] = cache1_table[AFI_IP]; - bgp_connected_table[AFI_IP] = bgp_table_init (); + bgp_connected_table[AFI_IP] = bgp_table_init (AFI_IP, SAFI_UNICAST); #ifdef HAVE_IPV6 - cache1_table[AFI_IP6] = bgp_table_init (); - cache2_table[AFI_IP6] = bgp_table_init (); + cache1_table[AFI_IP6] = bgp_table_init (AFI_IP6, SAFI_UNICAST); + cache2_table[AFI_IP6] = bgp_table_init (AFI_IP6, SAFI_UNICAST); bgp_nexthop_cache_table[AFI_IP6] = cache1_table[AFI_IP6]; - bgp_connected_table[AFI_IP6] = bgp_table_init (); + bgp_connected_table[AFI_IP6] = bgp_table_init (AFI_IP6, SAFI_UNICAST); #endif /* HAVE_IPV6 */ /* Make BGP scan thread. */ diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -71,7 +71,7 @@ bgp_afi_node_get (struct bgp_table *tabl prn = bgp_node_get (table, (struct prefix *) prd); if (prn->info == NULL) - prn->info = bgp_table_init (); + prn->info = bgp_table_init (afi, safi); else bgp_unlock_node (prn); table = prn->info; @@ -1191,44 +1191,46 @@ bgp_best_selection (struct bgp *bgp, str static int bgp_process_announce_selected (struct peer *peer, struct bgp_info *selected, - struct bgp_node *rn, struct attr *attr, afi_t afi, safi_t safi) - { + struct bgp_node *rn, struct attr *attr) +{ + afi_t afi = rn->table->afi; + safi_t safi = rn->table->safi; + struct prefix *p; p = &rn->p; - /* Announce route to Established peer. */ - if (peer->status != Established) + /* Announce route to Established peer. */ + if (peer->status != Established) return 0; - /* Address family configuration check. */ - if (! peer->afc_nego[afi][safi]) + /* Address family configuration check. */ + if (!peer->afc_nego[afi][safi]) return 0; - /* First update is deferred until ORF or ROUTE-REFRESH is received */ - if (CHECK_FLAG (peer->af_sflags[afi][safi], - PEER_STATUS_ORF_WAIT_REFRESH)) + /* First update is deferred until ORF or ROUTE-REFRESH is received */ + if (CHECK_FLAG (peer->af_sflags[afi][safi], PEER_STATUS_ORF_WAIT_REFRESH)) return 0; switch (rn->table->type) { - case BGP_TABLE_MAIN: + case BGP_TABLE_MAIN: /* Announcement to peer->conf. If the route is filtered, withdraw it. */ - if (selected && bgp_announce_check (selected, peer, p, attr, afi, safi)) - bgp_adj_out_set (rn, peer, p, attr, afi, safi, selected); - else - bgp_adj_out_unset (rn, peer, p, afi, safi); - break; - case BGP_TABLE_RSCLIENT: - /* Announcement to peer->conf. If the route is filtered, - withdraw it. */ - if (selected && bgp_announce_check_rsclient - (selected, peer, p, attr, afi, safi)) - bgp_adj_out_set (rn, peer, p, attr, afi, safi, selected); + if (selected && bgp_announce_check (selected, peer, p, attr, afi, safi)) + bgp_adj_out_set (rn, peer, p, attr, afi, safi, selected); else bgp_adj_out_unset (rn, peer, p, afi, safi); - break; + break; + case BGP_TABLE_RSCLIENT: + /* Announcement to peer->conf. If the route is filtered, + withdraw it. */ + if (selected && bgp_announce_check_rsclient + (selected, peer, p, attr, afi, safi)) + bgp_adj_out_set (rn, peer, p, attr, afi, safi, selected); + else + bgp_adj_out_unset (rn, peer, p, afi, safi); + break; } return 0; } @@ -1237,17 +1239,14 @@ struct bgp_process_queue { struct bgp *bgp; struct bgp_node *rn; - afi_t afi; - safi_t safi; }; static wq_item_status -bgp_process_rsclient (struct bgp_process_queue *pq) +bgp_process_rsclient (struct work_queue *wq, void *data) { + struct bgp_process_queue *pq = data; struct bgp *bgp = pq->bgp; struct bgp_node *rn = pq->rn; - afi_t afi = pq->afi; - safi_t safi = pq->safi; struct bgp_info *new_select; struct bgp_info *old_select; struct bgp_info_pair old_and_new; @@ -1255,13 +1254,6 @@ bgp_process_rsclient (struct bgp_process struct listnode *node, *nnode; struct peer *rsclient = rn->table->owner; - /* we shouldn't run if the clear_route_node queue is still running - * or scheduled to run, or we can race with session coming up - * and adding routes back before we've cleared them - */ - if (bm->clear_node_queue && bm->clear_node_queue->thread) - return WQ_QUEUE_BLOCKED; - /* Best path selection. */ bgp_best_selection (bgp, rn, &old_and_new); new_select = old_and_new.new; @@ -1284,8 +1276,7 @@ bgp_process_rsclient (struct bgp_process UNSET_FLAG (new_select->flags, BGP_INFO_ATTR_CHANGED); } - bgp_process_announce_selected (rsclient, new_select, rn, &attr, - afi, safi); + bgp_process_announce_selected (rsclient, new_select, rn, &attr); } } else @@ -1297,8 +1288,7 @@ bgp_process_rsclient (struct bgp_process SET_FLAG (new_select->flags, BGP_INFO_SELECTED); UNSET_FLAG (new_select->flags, BGP_INFO_ATTR_CHANGED); } - bgp_process_announce_selected (rsclient, new_select, rn, - &attr, afi, safi); + bgp_process_announce_selected (rsclient, new_select, rn, &attr); } if (old_select && CHECK_FLAG (old_select->flags, BGP_INFO_REMOVED)) @@ -1309,12 +1299,11 @@ bgp_process_rsclient (struct bgp_process } static wq_item_status -bgp_process_main (struct bgp_process_queue *pq) +bgp_process_main (struct work_queue *wq, void *data) { + struct bgp_process_queue *pq = data; struct bgp *bgp = pq->bgp; struct bgp_node *rn = pq->rn; - afi_t afi = pq->afi; - safi_t safi = pq->safi; struct prefix *p = &rn->p; struct bgp_info *new_select; struct bgp_info *old_select; @@ -1323,13 +1312,6 @@ bgp_process_main (struct bgp_process_que struct peer *peer; struct attr attr; - /* we shouldn't run if the clear_route_node queue is still running - * or scheduled to run, or we can race with session coming up - * and adding routes back before we've cleared them - */ - if (bm->clear_node_queue && bm->clear_node_queue->thread) - return WQ_QUEUE_BLOCKED; - /* Best path selection. */ bgp_best_selection (bgp, rn, &old_and_new); old_select = old_and_new.old; @@ -1359,12 +1341,10 @@ bgp_process_main (struct bgp_process_que /* Check each BGP peer. */ for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) - { - bgp_process_announce_selected (peer, new_select, rn, &attr, afi, safi); - } + bgp_process_announce_selected (peer, new_select, rn, &attr); /* FIB update. */ - if (safi == SAFI_UNICAST && ! bgp->name && + if (rn->table->safi == SAFI_UNICAST && ! bgp->name && ! bgp_option_check (BGP_OPT_NO_FIB)) { if (new_select @@ -1390,8 +1370,10 @@ bgp_process_main (struct bgp_process_que } static void -bgp_processq_del (struct bgp_process_queue *pq) +bgp_processq_del (struct work_queue *wq, void *data) { + struct bgp_process_queue *pq = data; + bgp_unlock_node (pq->rn); XFREE (MTYPE_BGP_PROCESS_QUEUE, pq); } @@ -1424,7 +1406,7 @@ bgp_process_queue_init (void) } void -bgp_process (struct bgp *bgp, struct bgp_node *rn, afi_t afi, safi_t safi) +bgp_process (struct bgp *bgp, struct bgp_node *rn) { struct bgp_process_queue *pqnode; @@ -1443,9 +1425,7 @@ bgp_process (struct bgp *bgp, struct bgp pqnode->rn = bgp_lock_node (rn); /* unlocked by bgp_processq_del */ pqnode->bgp = bgp; - pqnode->afi = afi; - pqnode->safi = safi; - + switch (rn->table->type) { case BGP_TABLE_MAIN: @@ -1552,57 +1532,81 @@ bgp_maximum_prefix_overflow (struct peer return 0; } -/* Unconditionally remove the route from the RIB, without taking - * damping into consideration (eg, because the session went down) - */ static void -bgp_rib_remove (struct bgp_node *rn, struct bgp_info *ri, struct peer *peer, - afi_t afi, safi_t safi) +bgp_pcount_increment (struct bgp_node *rn, struct bgp_info *ri) +{ + afi_t afi = rn->table->afi; + safi_t safi = rn->table->safi; + + if (!BGP_INFO_HOLDDOWN (ri) && rn->table->type == BGP_TABLE_MAIN) + { + ri->peer->pcount[afi][safi]++; + bgp_aggregate_increment (ri->peer->bgp, &rn->p, ri, afi, safi); + } +} + +static void +bgp_pcount_decrement (struct bgp_node *rn, struct bgp_info *ri) { - if (!CHECK_FLAG (ri->flags, BGP_INFO_HISTORY) - && rn->table->type == BGP_TABLE_MAIN) + afi_t afi = rn->table->afi; + safi_t safi = rn->table->safi; + + /* Ignore 'pcount' for RS-client tables */ + if (rn->table->type == BGP_TABLE_MAIN) { - /* Ignore 'pcount' for RS-client tables */ - if ( rn->table->type == BGP_TABLE_MAIN) + /* slight hack, but more robust against errors. */ + if (ri->peer->pcount[afi][safi]) + ri->peer->pcount[afi][safi]--; + else { - peer->pcount[afi][safi]--; - bgp_aggregate_decrement (peer->bgp, &rn->p, ri, afi, safi); + zlog_warn ("%s: Asked to decrement 0 prefix count for peer %s", + __func__, ri->peer->host); + zlog_backtrace (LOG_WARNING); + zlog_warn ("%s: Please report to Quagga bugzilla", __func__); } + + if (!BGP_INFO_HOLDDOWN (ri)) + bgp_aggregate_decrement (ri->peer->bgp, &rn->p, ri, afi, safi); } - bgp_process (peer->bgp, rn, afi, safi); - bgp_info_delete (rn, ri); } +/* Unconditionally remove the route from the RIB, without taking + * damping into consideration (eg, because the session went down) + */ static void -bgp_rib_withdraw (struct bgp_node *rn, struct bgp_info *ri, struct peer *peer, - afi_t afi, safi_t safi) +bgp_rib_remove (struct bgp_node *rn, struct bgp_info *ri) { + bgp_pcount_decrement (rn, ri); + + if (!CHECK_FLAG (ri->flags, BGP_INFO_HISTORY)) + bgp_info_delete (rn, ri); /* keep historical info */ + + bgp_process (ri->peer->bgp, rn); +} + +static void +bgp_rib_withdraw (struct bgp_node *rn, struct bgp_info *ri) +{ + afi_t afi = rn->table->afi; + safi_t safi = rn->table->safi; + int status = BGP_DAMP_NONE; - if (!CHECK_FLAG (ri->flags, BGP_INFO_HISTORY) - && rn->table->type == BGP_TABLE_MAIN) - { - /* Ignore 'pcount' for RS-client tables */ - if ( rn->table->type == BGP_TABLE_MAIN) - { - peer->pcount[afi][safi]--; - bgp_aggregate_decrement (peer->bgp, &rn->p, ri, afi, safi); - } - } + bgp_pcount_decrement (rn, ri); /* apply dampening, if result is suppressed, we'll be retaining * the bgp_info in the RIB for historical reference. */ - if (CHECK_FLAG (peer->bgp->af_flags[afi][safi], BGP_CONFIG_DAMPENING) - && peer_sort (peer) == BGP_PEER_EBGP) + if (CHECK_FLAG (ri->peer->bgp->af_flags[afi][safi], BGP_CONFIG_DAMPENING) + && peer_sort (ri->peer) == BGP_PEER_EBGP) if ( (status = bgp_damp_withdraw (ri, rn, afi, safi, 0)) == BGP_DAMP_SUPPRESSED) return; - - bgp_process (peer->bgp, rn, afi, safi); - + if (status != BGP_DAMP_USED) bgp_info_delete (rn, ri); + + bgp_process (ri->peer->bgp, rn); } static void @@ -1730,7 +1734,7 @@ bgp_update_rsclient (struct peer *rsclie SET_FLAG (ri->flags, BGP_INFO_VALID); /* Process change. */ - bgp_process (bgp, rn, afi, safi); + bgp_process (bgp, rn); bgp_unlock_node (rn); return; @@ -1766,7 +1770,7 @@ bgp_update_rsclient (struct peer *rsclie bgp_unlock_node (rn); /* Process change. */ - bgp_process (bgp, rn, afi, safi); + bgp_process (bgp, rn); return; @@ -1781,7 +1785,7 @@ bgp_update_rsclient (struct peer *rsclie p->prefixlen, rsclient->host, reason); if (ri) - bgp_rib_remove (rn, ri, peer, afi, safi); + bgp_rib_remove (rn, ri); bgp_unlock_node (rn); @@ -1809,7 +1813,7 @@ bgp_withdraw_rsclient (struct peer *rscl /* Withdraw specified route from routing table. */ if (ri && ! CHECK_FLAG (ri->flags, BGP_INFO_HISTORY)) - bgp_rib_withdraw (rn, ri, peer, afi, safi); + bgp_rib_withdraw (rn, ri); else if (BGP_DEBUG (update, UPDATE_IN)) zlog (peer->log, LOG_DEBUG, "%s Can't find the route %s/%d", peer->host, @@ -1950,13 +1954,9 @@ bgp_update_main (struct peer *peer, stru inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), p->prefixlen); - peer->pcount[afi][safi]++; - ret = bgp_damp_update (ri, rn, afi, safi); + ret = bgp_damp_update (ri, rn); if (ret != BGP_DAMP_SUPPRESSED) - { - bgp_aggregate_increment (bgp, p, ri, afi, safi); - bgp_process (bgp, rn, afi, safi); - } + bgp_pcount_increment (rn, ri); } else { @@ -1971,7 +1971,7 @@ bgp_update_main (struct peer *peer, stru if (CHECK_FLAG (ri->flags, BGP_INFO_STALE)) { UNSET_FLAG (ri->flags, BGP_INFO_STALE); - peer->pcount[afi][safi]++; + bgp_pcount_increment (rn, ri); } } @@ -1989,14 +1989,16 @@ bgp_update_main (struct peer *peer, stru /* graceful restart STALE flag unset. */ if (CHECK_FLAG (ri->flags, BGP_INFO_STALE)) - { - UNSET_FLAG (ri->flags, BGP_INFO_STALE); - peer->pcount[afi][safi]++; - } + UNSET_FLAG (ri->flags, BGP_INFO_STALE); /* The attribute is changed. */ SET_FLAG (ri->flags, BGP_INFO_ATTR_CHANGED); - + + /* implicit withdraw, decrement aggregate and pcount here. + * only if update is accepted, they'll increment below. + */ + bgp_pcount_decrement (rn, ri); + /* Update bgp route dampening information. */ if (CHECK_FLAG (bgp->af_flags[afi][safi], BGP_CONFIG_DAMPENING) && peer_sort (peer) == BGP_PEER_EBGP) @@ -2005,12 +2007,8 @@ bgp_update_main (struct peer *peer, stru information. */ if (! CHECK_FLAG (ri->flags, BGP_INFO_HISTORY)) bgp_damp_withdraw (ri, rn, afi, safi, 1); - else - peer->pcount[afi][safi]++; } - bgp_aggregate_decrement (bgp, p, ri, afi, safi); - /* Update to new attribute. */ bgp_attr_unintern (ri->attr); ri->attr = attr_new; @@ -2024,7 +2022,7 @@ bgp_update_main (struct peer *peer, stru && peer_sort (peer) == BGP_PEER_EBGP) { /* Now we do normal update dampening. */ - ret = bgp_damp_update (ri, rn, afi, safi); + ret = bgp_damp_update (ri, rn); if (ret == BGP_DAMP_SUPPRESSED) { bgp_unlock_node (rn); @@ -2048,9 +2046,9 @@ bgp_update_main (struct peer *peer, stru SET_FLAG (ri->flags, BGP_INFO_VALID); /* Process change. */ - bgp_aggregate_increment (bgp, p, ri, afi, safi); + bgp_pcount_increment (rn, ri); - bgp_process (bgp, rn, afi, safi); + bgp_process (bgp, rn); bgp_unlock_node (rn); return 0; } @@ -2064,9 +2062,6 @@ bgp_update_main (struct peer *peer, stru p->prefixlen); } - /* Increment prefix counter */ - peer->pcount[afi][safi]++; - /* Make new BGP info. */ new = bgp_info_new (); new->type = type; @@ -2077,7 +2072,7 @@ bgp_update_main (struct peer *peer, stru /* Update MPLS tag. */ if (safi == SAFI_MPLS_VPN) - memcpy (new->tag, tag, 3); + memcpy (new->tag, tag, sizeof (tag)); /* Nexthop reachability check. */ if ((afi == AFI_IP || afi == AFI_IP6) @@ -2094,8 +2089,8 @@ bgp_update_main (struct peer *peer, stru else SET_FLAG (new->flags, BGP_INFO_VALID); - /* Aggregate address increment. */ - bgp_aggregate_increment (bgp, p, new, afi, safi); + /* Increment prefix */ + bgp_pcount_increment (rn, new); /* Register new BGP information. */ bgp_info_add (rn, new); @@ -2109,7 +2104,7 @@ bgp_update_main (struct peer *peer, stru return -1; /* Process change. */ - bgp_process (bgp, rn, afi, safi); + bgp_process (bgp, rn); return 0; @@ -2124,7 +2119,7 @@ bgp_update_main (struct peer *peer, stru p->prefixlen, reason); if (ri) - bgp_rib_remove (rn, ri, peer, afi, safi); + bgp_rib_remove (rn, ri); bgp_unlock_node (rn); @@ -2201,7 +2196,7 @@ bgp_withdraw (struct peer *peer, struct /* Withdraw specified route from routing table. */ if (ri && ! CHECK_FLAG (ri->flags, BGP_INFO_HISTORY)) - bgp_rib_withdraw (rn, ri, peer, afi, safi); + bgp_rib_withdraw (rn, ri); else if (BGP_DEBUG (update, UPDATE_IN)) zlog (peer->log, LOG_DEBUG, "%s Can't find the route %s/%d", peer->host, @@ -2441,132 +2436,138 @@ bgp_soft_reconfig_in (struct peer *peer, bgp_soft_reconfig_table (peer, afi, safi, table); } -struct bgp_clear_node_queue -{ - struct bgp_node *rn; - struct peer *peer; - afi_t afi; - safi_t safi; -}; - static wq_item_status -bgp_clear_route_node (struct bgp_clear_node_queue *cq) +bgp_clear_route_node (struct work_queue *wq, void *data) { + struct bgp_node *rn = data; + struct peer *peer = wq->spec.data; struct bgp_adj_in *ain; struct bgp_adj_out *aout; struct bgp_info *ri; + afi_t afi = rn->table->afi; + safi_t safi = rn->table->safi; - assert (cq->rn && cq->peer); + assert (rn && peer); - for (ri = cq->rn->info; ri; ri = ri->next) - if (ri->peer == cq->peer) + for (ri = rn->info; ri; ri = ri->next) + if (ri->peer == peer) { /* graceful restart STALE flag set. */ - if (CHECK_FLAG (cq->peer->sflags, PEER_STATUS_NSF_WAIT) - && cq->peer->nsf[cq->afi][cq->safi] + if (CHECK_FLAG (peer->sflags, PEER_STATUS_NSF_WAIT) + && peer->nsf[afi][safi] && ! CHECK_FLAG (ri->flags, BGP_INFO_STALE) && ! CHECK_FLAG (ri->flags, BGP_INFO_HISTORY) - && ! CHECK_FLAG (ri->flags, BGP_INFO_DAMPED)) + && ! CHECK_FLAG (ri->flags, BGP_INFO_DAMPED) + && ! CHECK_FLAG (ri->flags, BGP_INFO_REMOVED)) { + bgp_pcount_decrement (rn, ri); SET_FLAG (ri->flags, BGP_INFO_STALE); - cq->peer->pcount[cq->afi][cq->safi]--; } else - bgp_rib_remove (cq->rn, ri, cq->peer, cq->afi, cq->safi); + bgp_rib_remove (rn, ri); break; } - for (ain = cq->rn->adj_in; ain; ain = ain->next) - if (ain->peer == cq->peer) + for (ain = rn->adj_in; ain; ain = ain->next) + if (ain->peer == peer) { - bgp_adj_in_remove (cq->rn, ain); - bgp_unlock_node (cq->rn); + bgp_adj_in_remove (rn, ain); + bgp_unlock_node (rn); break; } - for (aout = cq->rn->adj_out; aout; aout = aout->next) - if (aout->peer == cq->peer) + for (aout = rn->adj_out; aout; aout = aout->next) + if (aout->peer == peer) { - bgp_adj_out_remove (cq->rn, aout, cq->peer, cq->afi, cq->safi); - bgp_unlock_node (cq->rn); + bgp_adj_out_remove (rn, aout, peer, afi, safi); + bgp_unlock_node (rn); break; - } + } return WQ_SUCCESS; } static void -bgp_clear_node_queue_del (struct bgp_clear_node_queue *cq) +bgp_clear_node_queue_del (struct work_queue *wq, void *data) { - bgp_unlock_node (cq->rn); - peer_unlock (cq->peer); /* bgp_clear_node_queue_del */ - XFREE (MTYPE_BGP_CLEAR_NODE_QUEUE, cq); + struct bgp_node *rn = data; + + bgp_unlock_node (rn); } static void bgp_clear_node_complete (struct work_queue *wq) { - /* unplug the 2 processing queues */ - if (bm->process_main_queue) - work_queue_unplug (bm->process_main_queue); - if (bm->process_rsclient_queue) - work_queue_unplug (bm->process_rsclient_queue); + struct peer *peer = wq->spec.data; + + UNSET_FLAG (peer->sflags, PEER_STATUS_CLEARING); + BGP_EVENT_ADD (peer, BGP_Start); + + peer_unlock (peer); /* bgp_clear_node_complete */ } static void -bgp_clear_node_queue_init (void) +bgp_clear_node_queue_init (struct peer *peer) { - if ( (bm->clear_node_queue - = work_queue_new (bm->master, "clear_route_node")) == NULL) +#define CLEAR_QUEUE_NAME_LEN 26 /* "clear 2001:123:123:123::1" */ + char wname[CLEAR_QUEUE_NAME_LEN]; + + snprintf (wname, CLEAR_QUEUE_NAME_LEN, "clear %s", peer->host); +#undef CLEAR_QUEUE_NAME_LEN + + if ( (peer->clear_node_queue = work_queue_new (bm->master, wname)) == NULL) { zlog_err ("%s: Failed to allocate work queue", __func__); exit (1); } - bm->clear_node_queue->spec.hold = 10; - bm->clear_node_queue->spec.delay = 0; /* no gathering to be gained */ - bm->clear_node_queue->spec.workfunc = &bgp_clear_route_node; - bm->clear_node_queue->spec.del_item_data = &bgp_clear_node_queue_del; - bm->clear_node_queue->spec.completion_func = &bgp_clear_node_complete; - bm->clear_node_queue->spec.max_retries = 0; + peer->clear_node_queue->spec.hold = 10; + peer->clear_node_queue->spec.delay = 0; /* no gathering to be gained */ + peer->clear_node_queue->spec.workfunc = &bgp_clear_route_node; + peer->clear_node_queue->spec.del_item_data = &bgp_clear_node_queue_del; + peer->clear_node_queue->spec.completion_func = &bgp_clear_node_complete; + peer->clear_node_queue->spec.max_retries = 0; + + /* we only 'lock' this peer reference when the queue is actually active */ + peer->clear_node_queue->spec.data = peer; } static void bgp_clear_route_table (struct peer *peer, afi_t afi, safi_t safi, struct bgp_table *table, struct peer *rsclient) { - struct bgp_clear_node_queue *cqnode; struct bgp_node *rn; if (! table) table = (rsclient) ? rsclient->rib[afi][safi] : peer->bgp->rib[afi][safi]; - + /* If still no table => afi/safi isn't configured at all or smth. */ if (! table) return; - if (bm->clear_node_queue == NULL) - bgp_clear_node_queue_init (); + if (peer->clear_node_queue == NULL) + bgp_clear_node_queue_init (peer); - /* plug the two bgp_process queues to avoid any chance of racing - * with a session coming back up and adding routes before we've - * cleared them all. We'll unplug them with completion callback. + /* bgp_fsm.c will not bring CLEARING sessions out of Idle this + * protects against peers which flap faster than we can we clear, + * which could lead to: + * + * a) race with routes from the new session being installed before + * clear_route_node visits the node (to delete the route of that + * peer) + * b) resource exhaustion, clear_route_node likely leads to an entry + * on the process_main queue. Fast-flapping could cause that queue + * to grow and grow. */ - if (bm->process_main_queue) - work_queue_plug (bm->process_main_queue); - if (bm->process_rsclient_queue) - work_queue_plug (bm->process_rsclient_queue); + if (!CHECK_FLAG (peer->sflags, PEER_STATUS_CLEARING)) + { + SET_FLAG (peer->sflags, PEER_STATUS_CLEARING); + peer_lock (peer); /* bgp_clear_node_complete */ + } for (rn = bgp_table_top (table); rn; rn = bgp_route_next (rn)) { if (rn->info == NULL) continue; - if ( (cqnode = XCALLOC (MTYPE_BGP_CLEAR_NODE_QUEUE, - sizeof (struct bgp_clear_node_queue))) == NULL) - continue; - - cqnode->rn = bgp_lock_node (rn); /* unlocked: bgp_clear_node_queue_del */ - cqnode->afi = afi; - cqnode->safi = safi; - cqnode->peer = peer_lock (peer); /* bgp_clear_node_queue_del */ - work_queue_add (bm->clear_node_queue, cqnode); + bgp_lock_node (rn); /* unlocked: bgp_clear_node_queue_del */ + work_queue_add (peer->clear_node_queue, rn); } return; } @@ -2639,7 +2640,7 @@ bgp_clear_stale_route (struct peer *peer if (ri->peer == peer) { if (CHECK_FLAG (ri->flags, BGP_INFO_STALE)) - bgp_rib_remove (rn, ri, peer, afi, safi); + bgp_rib_remove (rn, ri); break; } } @@ -2886,7 +2887,7 @@ bgp_static_withdraw_rsclient (struct bgp if (ri) { UNSET_FLAG (ri->flags, BGP_INFO_VALID); - bgp_process (bgp, rn, afi, safi); + bgp_process (bgp, rn); bgp_info_delete (rn, ri); } @@ -3005,7 +3006,7 @@ bgp_static_update_rsclient (struct peer ri->uptime = time (NULL); /* Process change. */ - bgp_process (bgp, rn, afi, safi); + bgp_process (bgp, rn); bgp_unlock_node (rn); aspath_unintern (attr.aspath); return; @@ -3028,7 +3029,7 @@ bgp_static_update_rsclient (struct peer bgp_unlock_node (rn); /* Process change. */ - bgp_process (bgp, rn, afi, safi); + bgp_process (bgp, rn); /* Unintern original. */ aspath_unintern (attr.aspath); @@ -3112,7 +3113,7 @@ bgp_static_update_main (struct bgp *bgp, /* Process change. */ bgp_aggregate_increment (bgp, p, ri, afi, safi); - bgp_process (bgp, rn, afi, safi); + bgp_process (bgp, rn); bgp_unlock_node (rn); aspath_unintern (attr.aspath); return; @@ -3138,7 +3139,7 @@ bgp_static_update_main (struct bgp *bgp, bgp_unlock_node (rn); /* Process change. */ - bgp_process (bgp, rn, afi, safi); + bgp_process (bgp, rn); /* Unintern original. */ aspath_unintern (attr.aspath); @@ -3188,7 +3189,7 @@ bgp_static_update_vpnv4 (struct bgp *bgp bgp_unlock_node (rn); /* Process change. */ - bgp_process (bgp, rn, afi, safi); + bgp_process (bgp, rn); } void @@ -3212,7 +3213,7 @@ bgp_static_withdraw (struct bgp *bgp, st { bgp_aggregate_decrement (bgp, p, ri, afi, safi); UNSET_FLAG (ri->flags, BGP_INFO_VALID); - bgp_process (bgp, rn, afi, safi); + bgp_process (bgp, rn); bgp_info_delete (rn, ri); } @@ -3261,7 +3262,7 @@ bgp_static_withdraw_vpnv4 (struct bgp *b { bgp_aggregate_decrement (bgp, p, ri, afi, safi); UNSET_FLAG (ri->flags, BGP_INFO_VALID); - bgp_process (bgp, rn, afi, safi); + bgp_process (bgp, rn); bgp_info_delete (rn, ri); } @@ -3498,7 +3499,7 @@ bgp_static_set_vpnv4 (struct vty *vty, c prn = bgp_node_get (bgp->route[AFI_IP][SAFI_MPLS_VPN], (struct prefix *)&prd); if (prn->info == NULL) - prn->info = bgp_table_init (); + prn->info = bgp_table_init (AFI_IP, SAFI_MPLS_VPN); else bgp_unlock_node (prn); table = prn->info; @@ -3567,7 +3568,7 @@ bgp_static_unset_vpnv4 (struct vty *vty, prn = bgp_node_get (bgp->route[AFI_IP][SAFI_MPLS_VPN], (struct prefix *)&prd); if (prn->info == NULL) - prn->info = bgp_table_init (); + prn->info = bgp_table_init (AFI_IP, SAFI_MPLS_VPN); else bgp_unlock_node (prn); table = prn->info; @@ -4079,7 +4080,7 @@ bgp_aggregate_route (struct bgp *bgp, st } } if (match) - bgp_process (bgp, rn, afi, safi); + bgp_process (bgp, rn); } bgp_unlock_node (top); @@ -4132,7 +4133,7 @@ bgp_aggregate_route (struct bgp *bgp, st bgp_info_add (rn, new); bgp_unlock_node (rn); - bgp_process (bgp, rn, afi, safi); + bgp_process (bgp, rn); } else { @@ -4284,7 +4285,7 @@ bgp_aggregate_add (struct bgp *bgp, stru /* If this node is suppressed, process the change. */ if (match) - bgp_process (bgp, rn, afi, safi); + bgp_process (bgp, rn); } bgp_unlock_node (top); @@ -4305,7 +4306,7 @@ bgp_aggregate_add (struct bgp *bgp, stru bgp_unlock_node (rn); /* Process change. */ - bgp_process (bgp, rn, afi, safi); + bgp_process (bgp, rn); } } @@ -4356,7 +4357,7 @@ bgp_aggregate_delete (struct bgp *bgp, s /* If this node is suppressed, process the change. */ if (match) - bgp_process (bgp, rn, afi, safi); + bgp_process (bgp, rn); } bgp_unlock_node (top); @@ -4373,7 +4374,7 @@ bgp_aggregate_delete (struct bgp *bgp, s if (ri) { UNSET_FLAG (ri->flags, BGP_INFO_VALID); - bgp_process (bgp, rn, afi, safi); + bgp_process (bgp, rn); bgp_info_delete (rn, ri); } @@ -4908,7 +4909,7 @@ bgp_redistribute_add (struct prefix *p, /* Process change. */ bgp_aggregate_increment (bgp, p, bi, afi, SAFI_UNICAST); - bgp_process (bgp, bn, afi, SAFI_UNICAST); + bgp_process (bgp, bn); bgp_unlock_node (bn); aspath_unintern (attr.aspath); return; @@ -4926,7 +4927,7 @@ bgp_redistribute_add (struct prefix *p, bgp_aggregate_increment (bgp, p, new, afi, SAFI_UNICAST); bgp_info_add (bn, new); bgp_unlock_node (bn); - bgp_process (bgp, bn, afi, SAFI_UNICAST); + bgp_process (bgp, bn); } } @@ -4960,7 +4961,7 @@ bgp_redistribute_delete (struct prefix * { bgp_aggregate_decrement (bgp, p, ri, afi, SAFI_UNICAST); UNSET_FLAG (ri->flags, BGP_INFO_VALID); - bgp_process (bgp, rn, afi, SAFI_UNICAST); + bgp_process (bgp, rn); bgp_info_delete (rn, ri); } bgp_unlock_node (rn); @@ -4989,7 +4990,7 @@ bgp_redistribute_withdraw (struct bgp *b { bgp_aggregate_decrement (bgp, &rn->p, ri, afi, SAFI_UNICAST); UNSET_FLAG (ri->flags, BGP_INFO_VALID); - bgp_process (bgp, rn, afi, SAFI_UNICAST); + bgp_process (bgp, rn); bgp_info_delete (rn, ri); } } @@ -5258,6 +5259,7 @@ damp_route_vty_out (struct vty *vty, str { struct attr *attr; int len; + char timebuf[BGP_UPTIME_LEN]; /* short status lead text */ route_vty_short_status_out (vty, binfo); @@ -5275,7 +5277,8 @@ damp_route_vty_out (struct vty *vty, str else vty_out (vty, "%*s", len, " "); - vty_out (vty, "%s ", bgp_damp_reuse_time_vty (vty, binfo)); + vty_out (vty, "%s ", + bgp_damp_reuse_time_vty (vty, binfo, timebuf, BGP_UPTIME_LEN)); /* Print attribute */ attr = binfo->attr; @@ -5331,12 +5334,12 @@ flap_route_vty_out (struct vty *vty, str else vty_out (vty, "%*s ", len, " "); - vty_out (vty, "%s ", peer_uptime (bdi->start_time, - timebuf, BGP_UPTIME_LEN)); + vty_out (vty, "%s ", peer_uptime (bdi->start_time, timebuf, BGP_UPTIME_LEN)); if (CHECK_FLAG (binfo->flags, BGP_INFO_DAMPED) && ! CHECK_FLAG (binfo->flags, BGP_INFO_HISTORY)) - vty_out (vty, "%s ", bgp_damp_reuse_time_vty (vty, binfo)); + vty_out (vty, "%s ", + bgp_damp_reuse_time_vty (vty, binfo, timebuf, BGP_UPTIME_LEN)); else vty_out (vty, "%*s ", 8, " "); @@ -10382,7 +10385,7 @@ void bgp_route_init () { /* Init BGP distance table. */ - bgp_distance_table = bgp_table_init (); + bgp_distance_table = bgp_table_init (AFI_IP, SAFI_UNICAST); /* IPv4 BGP commands. */ install_element (BGP_NODE, &bgp_network_cmd); diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h --- a/bgpd/bgp_route.h +++ b/bgpd/bgp_route.h @@ -179,7 +179,7 @@ extern int bgp_withdraw (struct peer *, afi_t, safi_t, int, int, struct prefix_rd *, u_char *); /* for bgp_nexthop and bgp_damp */ -extern void bgp_process (struct bgp *, struct bgp_node *, afi_t, safi_t); +extern void bgp_process (struct bgp *, struct bgp_node *); extern int bgp_config_write_network (struct vty *, struct bgp *, afi_t, safi_t, int *); extern int bgp_config_write_distance (struct vty *, struct bgp *); diff --git a/bgpd/bgp_table.c b/bgpd/bgp_table.c --- a/bgpd/bgp_table.c +++ b/bgpd/bgp_table.c @@ -32,7 +32,7 @@ void bgp_node_delete (struct bgp_node *) void bgp_table_free (struct bgp_table *); struct bgp_table * -bgp_table_init (void) +bgp_table_init (afi_t afi, safi_t safi) { struct bgp_table *rt; @@ -40,7 +40,9 @@ bgp_table_init (void) memset (rt, 0, sizeof (struct bgp_table)); rt->type = BGP_TABLE_MAIN; - + rt->afi = afi; + rt->safi = safi; + return rt; } diff --git a/bgpd/bgp_table.h b/bgpd/bgp_table.h --- a/bgpd/bgp_table.h +++ b/bgpd/bgp_table.h @@ -30,7 +30,11 @@ typedef enum struct bgp_table { bgp_table_t type; - + + /* afi/safi of this table */ + afi_t afi; + safi_t safi; + /* The owner of this 'bgp_table' structure. */ void *owner; @@ -63,7 +67,7 @@ struct bgp_node #define BGP_NODE_PROCESS_SCHEDULED (1 << 0) }; -extern struct bgp_table *bgp_table_init (void); +extern struct bgp_table *bgp_table_init (afi_t, safi_t); extern void bgp_table_finish (struct bgp_table *); extern void bgp_unlock_node (struct bgp_node *node); extern void bgp_node_delete (struct bgp_node *node); diff --git a/bgpd/bgp_vty.c b/bgpd/bgp_vty.c --- a/bgpd/bgp_vty.c +++ b/bgpd/bgp_vty.c @@ -2053,7 +2053,7 @@ peer_rsclient_set_vty (struct vty *vty, if (ret < 0) return bgp_vty_return (vty, ret); - peer->rib[afi][safi] = bgp_table_init (); + peer->rib[afi][safi] = bgp_table_init (afi, safi); peer->rib[afi][safi]->type = BGP_TABLE_RSCLIENT; peer->rib[afi][safi]->owner = peer; @@ -6535,6 +6535,8 @@ bgp_show_summary (struct vty *vty, struc { if (CHECK_FLAG (peer->flags, PEER_FLAG_SHUTDOWN)) vty_out (vty, " Idle (Admin)"); + else if (CHECK_FLAG (peer->sflags, PEER_STATUS_CLEARING)) + vty_out (vty, " Idle (Clrng)"); else if (CHECK_FLAG (peer->sflags, PEER_STATUS_PREFIX_OVERFLOW)) vty_out (vty, " Idle (PfxCt)"); else diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -1868,9 +1868,9 @@ bgp_create (as_t *as, const char *name) for (afi = AFI_IP; afi < AFI_MAX; afi++) for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) { - bgp->route[afi][safi] = bgp_table_init (); - bgp->aggregate[afi][safi] = bgp_table_init (); - bgp->rib[afi][safi] = bgp_table_init (); + bgp->route[afi][safi] = bgp_table_init (afi, safi); + bgp->aggregate[afi][safi] = bgp_table_init (afi, safi); + bgp->rib[afi][safi] = bgp_table_init (afi, safi); } bgp->default_local_pref = BGP_DEFAULT_LOCAL_PREF; diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -40,7 +40,6 @@ struct bgp_master /* work queues */ struct work_queue *process_main_queue; struct work_queue *process_rsclient_queue; - struct work_queue *clear_node_queue; /* BGP port number. */ u_int16_t port; @@ -387,6 +386,7 @@ struct peer #define PEER_STATUS_GROUP (1 << 4) /* peer-group conf */ #define PEER_STATUS_NSF_MODE (1 << 5) /* NSF aware peer */ #define PEER_STATUS_NSF_WAIT (1 << 6) /* wait comeback peer */ +#define PEER_STATUS_CLEARING (1 << 7) /* peers table being cleared */ /* Peer status af flags (reset in bgp_stop) */ u_int16_t af_sflags[AFI_MAX][SAFI_MAX]; @@ -398,7 +398,6 @@ struct peer #define PEER_STATUS_EOR_SEND (1 << 5) /* end-of-rib send to peer */ #define PEER_STATUS_EOR_RECEIVED (1 << 6) /* end-of-rib received from peer */ - /* Default attribute value for the peer. */ u_int32_t config; #define PEER_CONFIG_WEIGHT (1 << 0) /* Default weight. */ @@ -433,7 +432,10 @@ struct peer struct thread *t_pmax_restart; struct thread *t_gr_restart; struct thread *t_gr_stale; - + + /* workqueues */ + struct work_queue *clear_node_queue; + /* Statistics field */ u_int32_t open_in; /* Open message input count */ u_int32_t open_out; /* Open message output count */ diff --git a/lib/ChangeLog b/lib/ChangeLog --- a/lib/ChangeLog +++ b/lib/ChangeLog @@ -1,3 +1,136 @@ +2005-11-08 Paul Jakma + + * memtypes.c: Remove MTYPE_BGP_CLEAR_NODE_QUEUE. + +2005-11-07 Paul Jakma + + * (general) pass struct work-queue to callback functions. + * workqueue.h: (struct work_queue) move the state flag + variables to end. + Add an opaque pointer to spec, for user-data global to the + queue. + Pass reference to work_queue to all callbacks. + * workqueue.c: (work_queue_item_remove) pass ref to workqueue + to user callbacks. + (work_queue_run) ditto. + +2005-11-05 Paul Jakma + + * (general) Add state to detect queue floods. There's no sense + trying to be sparing of CPU resources, if the queue is + flooding and using ever more memory resources. we should just + get on with clearing the queue. + The sense of delay and hold were wrong way around, fix. + * workqueue.h: (struct work_queue) Add status bitfield. Add + 'flood' integer to workqueue spec. Add runs_since_clear + counter to workqueue. + * workqueue.c: (work_queue_new) set defaults for delay, hold + and flood. + (work_queue_add) initial schedule should use delay, not hold. + (show_work_queues) Print flood field, conserve whitespace. + (work_queue_unplug) use delay, not hold. + (work_queue_run) consecutive runs should be seperated by hold + time, not delay. + Keep track of number of consecutive runs, go into 'overdrive' + if queue is being flooded, we can't avoid making heavy use of + resources, better to use CPU than ever more RAM. + +2005-11-05 Paul Jakma + + * memory.c: (general) Add caching of specified types. + Factor of 2 improvement in malloc times for heavily reused + objects. Minor degradation in worst-case usage objects, about + 10% overhead, however those kinds of objects should NOT be + marked cacheable anyway. + (struct mstat) Add extra fields for caching. + (alloc_inc) Add tide tracking. + (alloc_dec) ditto. + (zmemory_cache_free) new function, free all cached objects + for a type. + (zmemory_cache_invalidate) new function, invalidate cache, + typically because different size requests have been made + which means caching is no longer safe. + (zmemory_cache_lookup) Quick by-type cache to short-circuit + malloc/free for objects we can easily cache. + (zmemory_cache_add) new function, potentially cache objects + which are freed for reuse. + (zmalloc) Try satisfy request from cache. + (zcalloc) ditto + (zrealloc) invalidate cache - size changes. + (zfree) Try cache object rather freeing, via zmemory_cache_add. + (zstrdup) Invalidate cache + (show_memory_vty_header) Add support for cache headers. + (show_memory_vty) Add support for the new stats and cache + fields. + (memory_init) Initialise the mstat array's cacheable field + from memory_lists. + * memory.h: Add cacheable ENUM. + Add cacheable field to struct memory_list. + * memtypes.c: Add cacheable specifiers to all types, based on + observation of which types get good hit rates. + +2005-11-05 Paul Jakma + + * memory.c: (general) Add optional extra useage-stats gathering. + (struct mstat) Compile time optional fields for extra statistics. + (alloc_inc) move to top, mark for inlining. Add mstats supports. + (alloc_dec) ditto. + (z{m,c,re}alloc,zfree,zstrdup) Add stats support. + (ifdef MEMORY_LOG) remove the stats stuff, done in the core + functions. + (show_memory_vty_header) new function, print an explanatory + header. + (show_memory_vty) Add support for the new stats fields + * memory.h: Include vty.h, compile warning. + +2005-11-05 Paul Jakma + + * memory.c: (general) Add caching of specified types. + Factor of 2 improvement in malloc times for heavily reused + objects. Minor degradation in worst-case usage objects, about + 10% overhead, however those kinds of objects should NOT be + marked cacheable anyway. + (struct mstat) Add extra fields for caching. + (alloc_inc) Add tide tracking. + (alloc_dec) ditto. + (zmemory_cache_free) new function, free all cached objects + for a type. + (zmemory_cache_invalidate) new function, invalidate cache, + typically because different size requests have been made + which means caching is no longer safe. + (zmemory_cache_lookup) Quick by-type cache to short-circuit + malloc/free for objects we can easily cache. + (zmemory_cache_add) new function, potentially cache objects + which are freed for reuse. + (zmalloc) Try satisfy request from cache. + (zcalloc) ditto + (zrealloc) invalidate cache - size changes. + (zfree) Try cache object rather freeing, via zmemory_cache_add. + (zstrdup) Invalidate cache + (show_memory_vty_header) Add support for cache headers. + (show_memory_vty) Add support for the new stats and cache + fields. + (memory_init) Initialise the mstat array's cacheable field + from memory_lists. + * memory.h: Add cacheable ENUM. + Add cacheable field to struct memory_list. + * memtypes.c: Add cacheable specifiers to all types, based on + observation of which types get good hit rates. + +2005-11-05 Paul Jakma + + * memory.c: (general) Add optional extra useage-stats gathering. + (struct mstat) Compile time optional fields for extra statistics. + (alloc_inc) move to top, mark for inlining. Add mstats supports. + (alloc_dec) ditto. + (z{m,c,re}alloc,zfree,zstrdup) Add stats support. + (ifdef MEMORY_LOG) remove the stats stuff, done in the core + functions. + (show_memory_vty_header) new function, print an explanatory + header. + (show_memory_vty) Add support for the new stats fields + * memory.h: Include vty.h, compile warning. + 2005-11-05 Paul Jakma * routemap.c: (vty_show_route_map_entry) call action is diff --git a/lib/memory.c b/lib/memory.c --- a/lib/memory.c +++ b/lib/memory.c @@ -25,10 +25,21 @@ #include "log.h" #include "memory.h" -static void alloc_inc (int); -static void alloc_dec (int); -static void log_memstats(int log_priority); +/* some debug and probably performance debilitating compile options.. */ +#define MTYPE_EXTRA_STATS 1 +/* Tide optimisation (if it actually is an optimisation */ +#define MTYPE_TRACK_TIDES 0 +/* free() debugging: poison on free if possible and verify poison on realloc, + * check for double-free's. + * This will use a lot of extra RAM. + */ +#define MTYPE_POISON 1 +/* Redzone tracking (where possible - size must stay same for object) */ +#define MTYPE_REDZONE 1 +static void log_memstats(int log_priority); +static const int redzone_marker = 0xf0f0f0f0; + static struct message mstr [] = { { MTYPE_THREAD, "thread" }, @@ -54,19 +65,369 @@ zerror (const char *fname, int type, siz abort(); } +/* most objects for a type we can try cache */ +/* If poisoning, cache lots of them so we get to verify lots of poison fill */ +#if defined(MTYPE_POISON) && (MTYPE_POISON > 0) +#define MTYPE_CACHE_NUM_SLOTS 50 +#else +/* Normally we only want a very few cache slots, as these are statically + * allocated + */ +#define MTYPE_CACHE_NUM_SLOTS 3 +#endif /* MTYPE_POISON */ + +/* Allocations / frees can be thought of as coming in 'tides', with a 'flow' + * of allocations followed by an 'ebb' of frees. We're really only interested + * in caching tides whose length is not much greater than the number of + * cache slots (ie, allocation patterns we can get a reasonably good hit + * rate on). Caching (ie not freeing) the first $FEW frees of a very long + * "tidal pattern" isn't going to do much, and at worst would just stuff + * things up for the underlying libc allocator (eg, fragmentation). + * + * Hence we track: + * flow: the current length and direction of flow or ebb + * + * And we only cache allocations while the ebb or flow is <= an + * arbitrary bound on the tide. Ie, we only cache objects whose + * allocation pattern has recently tended to be a roughly symmetrical + * series of allocs/frees, allowed to skew only within this bound. + */ +#define MTYPE_CACHE_TIDE (MTYPE_CACHE_NUM_SLOTS * 16) + +/* absolute value of the signed flow value */ +#define MTYPE_CACHE_FLOW_ABS(T) \ + ((mstat[(T)].flow >= 0) ? (mstat[(T)].flow) \ + : (-mstat[(T)].flow)) + +/* Is cache valid? */ +#define CACHE_IS_INVALID(T) (mstat[(T)].cache_used == -1) + +static struct +{ + unsigned long alloc; + size_t cached_size; /* size of objects cached */ +#if (MTYPE_TRACK_TIDES > 0) + int flow; /* the current direction and length of the tide */ +#endif + void *cache_slot[MTYPE_CACHE_NUM_SLOTS]; + int cache_used; /* -1 means never ever cache again, see zrealloc() */ + enum mtype_cacheable cacheable; +#if (MTYPE_EXTRA_STATS > 0) + unsigned long st_cache_hit; + unsigned long st_cache_invalidated; + unsigned long st_cache_revalidated; + unsigned long st_cache_add; + unsigned long st_malloc; + unsigned long st_calloc; + unsigned long st_realloc; + unsigned long st_strdup; + unsigned long st_free; +#endif /* MTYPE_EXTRA_STATS */ +} mstat [MTYPE_MAX]; + +/* Increment allocation counter. */ +static inline void +alloc_inc (int type) +{ + mstat[type].alloc++; + +#if (MTYPE_TRACK_TIDES > 0) + if (mstat[type].flow < 0) + mstat[type].flow = 1; + else + mstat[type].flow++; +#endif /* MTYPE_TRACK_TIDES */ +} + +/* Decrement allocation counter. */ +static inline void +alloc_dec (int type) +{ + mstat[type].alloc--; + +#if (MTYPE_TRACK_TIDES > 0) + if (mstat[type].flow > 0) + mstat[type].flow = -1; + else + mstat[type].flow--; +#endif /* MTYPE_TRACK_TIDES */ +} + +/* free the cache, eg because it was invalidated, or the tide is too + * long + */ +static void +zmemory_cache_free (int type) +{ + while (mstat[type].cache_used > 0) + { + mstat[type].cache_used--; + free (mstat[type].cache_slot[mstat[type].cache_used]); + } +} + +/* invalidate the cache. Free all entries. Reset cached_size */ +static void +zmemory_cache_invalidate (int type) +{ + mstat[type].cached_size = 0; + +#if (MTYPE_EXTRA_STATS > 0) + mstat[type].st_cache_invalidated++; +#endif /* MTYPE_EXTRA_STATS */ + + zmemory_cache_free (type); + + mstat[type].cache_used = -1; +} + +/* helper to fill pointed to buffer + * only for use by zpoison and zpoison_verify + */ +static void +zpoison_fill (unsigned int *p, size_t size, unsigned int fill) +{ + unsigned int i; + + for (i = 0; i < (size / sizeof (fill)); i++) + memcpy ((p + i), &fill, sizeof (fill)); + + if (size % sizeof (fill)) + memcpy ((p + i), &fill, size % sizeof (fill)); +} + +static const unsigned int poison = 0x00000badU; +static const unsigned int antidote = 0xdeadbeefU; + +/* poison memory */ +static void +zpoison (int type, void *ptr) +{ + /* only if cache is valid can we have a size */ + if (CACHE_IS_INVALID (type)) + return; + + /* if cache is valid, and we're poisoning freed object, we surely must + * have a size cached.. + */ + assert (mstat[type].cached_size > 0); + + zpoison_fill (ptr, mstat[type].cached_size, poison); + + return; +} + +static inline void * +zpoison_verify (int type, unsigned int *ptr) +{ + unsigned int i; + + assert (!CACHE_IS_INVALID (type) && mstat[type].cached_size > 0); + + for (i = 0; i < (mstat[type].cached_size / sizeof (poison)); i++) + assert (*(ptr + i) == poison); + + zpoison_fill (ptr, mstat[type].cached_size, antidote); + + return ptr; +} + +/* Lookup cache entry for (type,size) This is a simple, low overhead + * cache to mitigate costs of repeated malloc(x)/free by higher level + * code which existing malloc implementations dont seem to deal well + * with. + * + * As we have a memory type parameter, information not available to a libc, + * and we already maintain stats per mtype, we can implement a low-overhead + * cache to short-circuit repetitive malloc(x)/free from having to go into + * system malloc()/free(). + * + * We cant handle the size changing, if we detect different size requests + * of memory for a type, the cache for that type is cleared and invalidated + * and will remain invalid until allocations for the type return to 0. + * + * Note that the most important function here is to: + * - detect size changing + * - keep cache invalid while different size allocations are outstanding + * + * For we use the size parameter to do things other than just cache types + * (overflow redzones) + */ +static void * +zmemory_cache_lookup (int type, size_t size) +{ + /* Caching invalid for this type, if all objects have been returned, + * we can enable caching again + */ + if (CACHE_IS_INVALID (type)) + { + /* If all outstanding allocations are returned, cache be can made + * valid again, otherwise left it is left as invalid. + * + * the still-invalid case is put first as all x86 CPUs seem to + * consider the first outcome of a branch as the most likely for + * branch prediction purposes.. + */ + if (mstat[type].alloc > 0) + return NULL; + else + { +#if (MTYPE_EXTRA_STATS > 0) + mstat[type].st_cache_revalidated++; +#endif /* MTYPE_EXTRA_STATS */ + + mstat[type].cache_used = 0; + } + } + + /* cache must be valid at this point. + * Three possibilities: + * - it's the first alloc (possibly after revalidation): + * size will be zero, record the size. + * - the size is the same: + * see if we can satisfy from cache + * - the size is different: + * invalidate the cache + */ + + /* record size for now */ + if (mstat[type].cached_size == 0) + { + mstat[type].cached_size = size; + return NULL; + } + + /* cache_used && cached_size must both be >= here */ + if (size == mstat[type].cached_size) + { + /* Not a cacheable type, or nothing cached, + * but we've done our job of tracking size + */ + if (mstat[type].cacheable != MTYPE_CACHE + || mstat[type].cache_used == 0) + return NULL; + +#if (MTYPE_EXTRA_STATS > 0) + mstat[type].st_cache_hit++; +#endif /* MTYPE_EXTRA_STATS */ + + mstat[type].cache_used--; + +#define MTYPE_CACHED_SLOT(T) (mstat[(T)].cache_slot[mstat[(T)].cache_used]) + if (MTYPE_POISON) + return zpoison_verify (type, MTYPE_CACHED_SLOT (type)); + else + return MTYPE_CACHED_SLOT (type); + } + else + { + /* size doesnt match, invalidate cache which will + * mark cache as unusable for now + */ + zmemory_cache_invalidate (type); + } + return NULL; +} + +/* return 0 or 1 to signify whether memory was cached. + * 0 - not added to cache + * 1 - added to cache + * + * caller is left to free (or not) as appropriate. + */ +static inline int +zmemory_cache_add (int type, void *p) +{ + int i; + /* caching invalid for this type */ + if (mstat[type].cacheable != MTYPE_CACHE + || CACHE_IS_INVALID (type)) + return 0; + + /* Double free check */ + if (MTYPE_POISON) + for (i = 0; i < mstat[type].cache_used; i++) + assert (mstat[type].cache_slot[i] != p); + +#if (MTYPE_TRACK_TIDES > 0) + /* Tide check: last tide and the current flow of the tide should be + * less than MTYPE_CACHE_TIDE. An object with very long 'tides' isn't + * worth caching. + */ + if (MTYPE_CACHE_FLOW_ABS(type) > MTYPE_CACHE_TIDE) + { + zmemory_cache_free (type); + return 0; + } +#endif /* MTYPE_TRACK_TIDES */ + + if (mstat[type].cache_used < MTYPE_CACHE_NUM_SLOTS) + { +#if (MTYPE_EXTRA_STATS > 0) + mstat[type].st_cache_add++; +#endif /* MTYPE_EXTRA_STATS */ + + mstat[type].cache_slot[mstat[type].cache_used] = p; + mstat[type].cache_used++; + return 1; + } + + return 0; +} + +/* Round up given size to where redzone would start, naturally aligned */ +#define REDZONE_ROUNDUP(S) \ + (1 + (((S) - 1) | (sizeof (redzone_marker) - 1))) +#define MTYPE_SIZE_WITH_REDZONE(S) \ + (REDZONE_ROUNDUP(S) + sizeof (redzone_marker)) + +static void +zredzone_add (char *p, size_t size) +{ + *((int *)(p + REDZONE_ROUNDUP(size))) = redzone_marker; +} + +static void +zredzone_verify (int type, char *p) +{ + size_t size = mstat[type].cached_size; + + if (CACHE_IS_INVALID (type)) + return; + + if (size > 0) + assert (*((int *)(p + REDZONE_ROUNDUP(size))) == redzone_marker); + else + assert (mstat[type].alloc == 0); +} + /* Memory allocation. */ void * zmalloc (int type, size_t size) { void *memory; - - memory = malloc (size); + + /* try the cache */ + if ( (memory = zmemory_cache_lookup (type, size)) == NULL) + { + if (MTYPE_REDZONE) + memory = malloc (REDZONE_ROUNDUP(size) + sizeof (redzone_marker)); + else + memory = malloc (size); + } if (memory == NULL) zerror ("malloc", type, size); - + + if (MTYPE_REDZONE) + zredzone_add (memory, size); + alloc_inc (type); - + +#if (MTYPE_EXTRA_STATS > 0) + mstat[type].st_malloc++; +#endif /* MTYPE_EXTRA_STATS */ + return memory; } @@ -76,11 +437,26 @@ zcalloc (int type, size_t size) { void *memory; - memory = calloc (1, size); + if ( (memory = zmemory_cache_lookup (type, size)) != NULL) + memset (memory, 0, size); + else + { + if (MTYPE_REDZONE) + memory = calloc (1, REDZONE_ROUNDUP(size) + sizeof (redzone_marker)); + else + memory = calloc (1, size); + } if (memory == NULL) zerror ("calloc", type, size); + if (MTYPE_REDZONE) + zredzone_add (memory, size); + +#if (MTYPE_EXTRA_STATS > 0) + mstat[type].st_calloc++; +#endif /* MTYPE_EXTRA_STATS */ + alloc_inc (type); return memory; @@ -91,10 +467,25 @@ void * zrealloc (int type, void *ptr, size_t size) { void *memory; - + + if (ptr == NULL) + return zmalloc (type, size); + + if (MTYPE_REDZONE) + zredzone_verify (type, ptr); + + /* invalidate cache if not already invalid */ + if (mstat[type].cache_used >= 0) + zmemory_cache_invalidate (type); + memory = realloc (ptr, size); if (memory == NULL) zerror ("realloc", type, size); + +#if (MTYPE_EXTRA_STATS > 0) + mstat[type].st_realloc++; +#endif /* MTYPE_EXTRA_STATS */ + return memory; } @@ -102,8 +493,21 @@ zrealloc (int type, void *ptr, size_t si void zfree (int type, void *ptr) { + if (MTYPE_POISON) + zpoison (type, ptr); + + if (MTYPE_REDZONE) + zredzone_verify (type, ptr); + + /* try add to cache, free if it wasnt cached */ + if (zmemory_cache_add (type, ptr) == 0) + free(ptr); + +#if (MTYPE_EXTRA_STATS > 0) + mstat[type].st_free++; +#endif /* MTYPE_EXTRA_STATS */ + alloc_dec (type); - free (ptr); } /* String duplication. */ @@ -111,28 +515,25 @@ char * zstrdup (int type, const char *str) { void *dup; - + dup = strdup (str); if (dup == NULL) zerror ("strdup", type, strlen (str)); + + /* invalidate cache if not already invalid, we have no idea of size */ + if (mstat[type].cache_used >= 0) + zmemory_cache_invalidate (type); + +#if (MTYPE_EXTRA_STATS > 0) + mstat[type].st_strdup++; +#endif /* MTYPE_EXTRA_STATS */ + alloc_inc (type); + return dup; } #ifdef MEMORY_LOG -static struct -{ - const char *name; - unsigned long alloc; - unsigned long t_malloc; - unsigned long c_malloc; - unsigned long t_calloc; - unsigned long c_calloc; - unsigned long t_realloc; - unsigned long t_free; - unsigned long c_strdup; -} mstat [MTYPE_MAX]; - static void mtype_log (char *func, void *memory, const char *file, int line, int type) { @@ -144,9 +545,6 @@ mtype_zmalloc (const char *file, int lin { void *memory; - mstat[type].c_malloc++; - mstat[type].t_malloc++; - memory = zmalloc (type, size); mtype_log ("zmalloc", memory, file, line, type); @@ -158,9 +556,6 @@ mtype_zcalloc (const char *file, int lin { void *memory; - mstat[type].c_calloc++; - mstat[type].t_calloc++; - memory = zcalloc (type, size); mtype_log ("xcalloc", memory, file, line, type); @@ -172,9 +567,6 @@ mtype_zrealloc (const char *file, int li { void *memory; - /* Realloc need before allocated pointer. */ - mstat[type].t_realloc++; - memory = zrealloc (type, ptr, size); mtype_log ("xrealloc", memory, file, line, type); @@ -186,10 +578,7 @@ mtype_zrealloc (const char *file, int li void mtype_zfree (const char *file, int line, int type, void *ptr) { - mstat[type].t_free++; - mtype_log ("xfree", ptr, file, line, type); - zfree (type, ptr); } @@ -198,35 +587,13 @@ mtype_zstrdup (const char *file, int lin { char *memory; - mstat[type].c_strdup++; - memory = zstrdup (type, str); mtype_log ("xstrdup", memory, file, line, type); return memory; } -#else -static struct -{ - char *name; - unsigned long alloc; -} mstat [MTYPE_MAX]; #endif /* MTPYE_LOG */ - -/* Increment allocation counter. */ -static void -alloc_inc (int type) -{ - mstat[type].alloc++; -} - -/* Decrement allocation counter. */ -static void -alloc_dec (int type) -{ - mstat[type].alloc--; -} /* Looking up memory status from vty interface. */ #include "vector.h" @@ -255,12 +622,59 @@ show_separator(struct vty *vty) vty_out (vty, "-----------------------------\r\n"); } +static void +show_memory_vty_header (struct vty *vty) +{ + vty_out (vty, "%12s\t%s%s", "Cached:", + "Objects cached, -1 for invalidated cache", + VTY_NEWLINE); + vty_out (vty, "%12s\t%s%s", "Size Cached:", + "Size of objects in bytes", VTY_NEWLINE); + if (MTYPE_TRACK_TIDES > 0) + vty_out (vty, "%12s\t%s%s", "flow:", + "Length of current flow/ebb (negative for ebb/free's)", + VTY_NEWLINE); + + if (MTYPE_EXTRA_STATS > 0) + { + vty_out (vty, "%12s\t%s%s", "cache hit:", + "Requests satisfied from cache", + VTY_NEWLINE); + vty_out (vty, "%12s\t%s%s", "cache add:", + "Object added to cache rather than freed", + VTY_NEWLINE); + vty_out (vty, "%12s\t%s%s", "inval:", + "Cache invalidated due to request size mismatch", + VTY_NEWLINE); + vty_out (vty, "%12s\t%s%s", "reval:", + "Invalid cache revalidated and made useable again", + VTY_NEWLINE); + vty_out (vty, "%12s\t%s%s", "diff:", + "Discrepancy between (allocations - free) and 'allocated'", + VTY_NEWLINE); + } + + vty_out (vty, "Cache slots: %3d, Poisoning:%senabled, " + "Redzone:%senabled%s", + MTYPE_CACHE_NUM_SLOTS, + (MTYPE_POISON > 0) ? " " : " not ", + (MTYPE_REDZONE > 0) ? " " : " not ", + VTY_NEWLINE); + + vty_out (vty, "%s%-28s | %10s | %10s | %11s | %7s%s", + VTY_NEWLINE, + "Memory Type", "Allocated", "Cached", "Size Cached", "Caching", + VTY_NEWLINE); +} + static int show_memory_vty (struct vty *vty, struct memory_list *list) { struct memory_list *m; int needsep = 0; + show_memory_vty_header (vty); + for (m = list; m->index >= 0; m++) if (m->index == 0) { @@ -270,11 +684,72 @@ show_memory_vty (struct vty *vty, struct needsep = 0; } } - else if (mstat[m->index].alloc) + else if (mstat[m->index].alloc +#if (MTYPE_EXTRA_STATS > 0) + || mstat[m->index].st_strdup + || mstat[m->index].st_calloc + || mstat[m->index].st_malloc + || mstat[m->index].st_free +#endif + ) { - vty_out (vty, "%-30s: %10ld\r\n", m->format, mstat[m->index].alloc); - needsep = 1; + vty_out (vty, "%-28s | %10lu | %10d | %11lu | %7s%s", + m->format, + mstat[m->index].alloc, + mstat[m->index].cache_used, + mstat[m->index].cached_size, + (mstat[m->index].cacheable == MTYPE_CACHE + && !CACHE_IS_INVALID(m->index)) ? "yes" : "no", + VTY_NEWLINE); +#if (MTYPE_EXTRA_STATS > 0) + { + long int diff = mstat[m->index].alloc + - (mstat[m->index].st_strdup + + mstat[m->index].st_calloc + + mstat[m->index].st_malloc + - mstat[m->index].st_free); + + vty_out (vty, "%28s | %10lu | %10lu | %11lu |%s", + "malloc | calloc | realloc", + mstat[m->index].st_malloc, + mstat[m->index].st_calloc, + mstat[m->index].st_realloc, + VTY_NEWLINE); + vty_out (vty, "%-28s | %10lu | %10lu | %11ld |%s", + " strdup | free | diff", + mstat[m->index].st_strdup, + mstat[m->index].st_free, + diff, + VTY_NEWLINE); + vty_out (vty, "%-28s | %10lu | %10lu |%s", + "cache hit | add |", + mstat[m->index].st_cache_hit, + mstat[m->index].st_cache_add, + VTY_NEWLINE); + vty_out (vty, "%-28s | %10lu | %10lu |%s", + " inval | reval |", + mstat[m->index].st_cache_invalidated, + mstat[m->index].st_cache_revalidated, + VTY_NEWLINE); + } +#endif /* MTYPE_EXTRA_STATS */ + +#if (MTYPE_TRACK_TIDES > 0) + vty_out (vty, "%-28s | %10d |%s", + " flow |", + mstat[m->index].flow, + VTY_NEWLINE); +#endif /* MTYPE_TRACK_TIDES */ + + /* If we don't have the extra stats output, every objects fits on one + * line and we don't need the extra newline to help distinguish + */ + if (MTYPE_TRACK_TIDES > 0 || MTYPE_EXTRA_STATS > 0) + vty_out (vty, "%s", VTY_NEWLINE); + + needsep = 1; } + return needsep; } @@ -395,6 +870,19 @@ DEFUN (show_memory_isis, void memory_init (void) { + struct mlist *ml; + struct memory_list *m; + + for (ml = mlists; ml->list; ml++) + { + for (m = ml->list; m->index >= 0; m++) + if (m->index > 0) + mstat[m->index].cacheable = m->cacheable; + } + + /* the 0th cache is special, for extremely lazy users, must be invalid */ + mstat[0].cache_used = -1; + install_element (VIEW_NODE, &show_memory_cmd); install_element (VIEW_NODE, &show_memory_all_cmd); install_element (VIEW_NODE, &show_memory_lib_cmd); diff --git a/lib/memory.h b/lib/memory.h --- a/lib/memory.h +++ b/lib/memory.h @@ -20,12 +20,16 @@ Software Foundation, Inc., 59 Temple Pla #ifndef _ZEBRA_MEMORY_H #define _ZEBRA_MEMORY_H +#include "vty.h" + +enum mtype_cacheable { MTYPE_NOCACHE = 0, MTYPE_CACHE = 1 }; /* For pretty printing of memory allocate information. */ struct memory_list { int index; const char *format; + enum mtype_cacheable cacheable; }; struct mlist { diff --git a/lib/memtypes.c b/lib/memtypes.c --- a/lib/memtypes.c +++ b/lib/memtypes.c @@ -14,239 +14,239 @@ struct memory_list memory_list_lib[] = { - { MTYPE_TMP, "Temporary memory" }, - { MTYPE_STRVEC, "String vector" }, - { MTYPE_VECTOR, "Vector" }, - { MTYPE_VECTOR_INDEX, "Vector index" }, - { MTYPE_LINK_LIST, "Link List" }, - { MTYPE_LINK_NODE, "Link Node" }, - { MTYPE_THREAD, "Thread" }, - { MTYPE_THREAD_MASTER, "Thread master" }, - { MTYPE_THREAD_STATS, "Thread stats" }, - { MTYPE_THREAD_FUNCNAME, "Thread function name" }, - { MTYPE_VTY, "VTY" }, - { MTYPE_VTY_OUT_BUF, "VTY output buffer" }, - { MTYPE_VTY_HIST, "VTY history" }, - { MTYPE_IF, "Interface" }, - { MTYPE_CONNECTED, "Connected" }, - { MTYPE_CONNECTED_LABEL, "Connected interface label" }, - { MTYPE_BUFFER, "Buffer" }, - { MTYPE_BUFFER_DATA, "Buffer data" }, - { MTYPE_STREAM, "Stream" }, - { MTYPE_STREAM_DATA, "Stream data" }, - { MTYPE_STREAM_FIFO, "Stream FIFO" }, - { MTYPE_PREFIX, "Prefix" }, - { MTYPE_PREFIX_IPV4, "Prefix IPv4" }, - { MTYPE_PREFIX_IPV6, "Prefix IPv6" }, - { MTYPE_HASH, "Hash" }, - { MTYPE_HASH_BACKET, "Hash Bucket" }, - { MTYPE_HASH_INDEX, "Hash Index" }, - { MTYPE_ROUTE_TABLE, "Route table" }, - { MTYPE_ROUTE_NODE, "Route node" }, - { MTYPE_DISTRIBUTE, "Distribute list" }, - { MTYPE_DISTRIBUTE_IFNAME, "Dist-list ifname" }, - { MTYPE_ACCESS_LIST, "Access List" }, - { MTYPE_ACCESS_LIST_STR, "Access List Str" }, - { MTYPE_ACCESS_FILTER, "Access Filter" }, - { MTYPE_PREFIX_LIST, "Prefix List" }, - { MTYPE_PREFIX_LIST_ENTRY, "Prefix List Entry" }, - { MTYPE_PREFIX_LIST_STR, "Prefix List Str" }, - { MTYPE_ROUTE_MAP, "Route map" }, - { MTYPE_ROUTE_MAP_NAME, "Route map name" }, - { MTYPE_ROUTE_MAP_INDEX, "Route map index" }, - { MTYPE_ROUTE_MAP_RULE, "Route map rule" }, - { MTYPE_ROUTE_MAP_RULE_STR, "Route map rule str" }, - { MTYPE_ROUTE_MAP_COMPILED, "Route map compiled" }, - { MTYPE_DESC, "Command desc" }, - { MTYPE_KEY, "Key" }, - { MTYPE_KEYCHAIN, "Key chain" }, - { MTYPE_IF_RMAP, "Interface route map" }, - { MTYPE_IF_RMAP_NAME, "I.f. route map name", }, - { MTYPE_SOCKUNION, "Socket union" }, - { MTYPE_PRIVS, "Privilege information" }, - { MTYPE_ZLOG, "Logging" }, - { MTYPE_ZCLIENT, "Zclient" }, - { MTYPE_WORK_QUEUE, "Work queue" }, - { MTYPE_WORK_QUEUE_ITEM, "Work queue item" }, - { MTYPE_WORK_QUEUE_NAME, "Work queue name string" }, - { MTYPE_PQUEUE, "Priority queue" }, - { MTYPE_PQUEUE_DATA, "Priority queue data" }, - { MTYPE_HOST, "Host config" }, + { MTYPE_TMP, "Temporary memory", MTYPE_NOCACHE, }, + { MTYPE_STRVEC, "String vector", MTYPE_NOCACHE, }, + { MTYPE_VECTOR, "Vector", MTYPE_CACHE, }, + { MTYPE_VECTOR_INDEX, "Vector index", MTYPE_NOCACHE }, + { MTYPE_LINK_LIST, "Link List", MTYPE_CACHE }, + { MTYPE_LINK_NODE, "Link Node", MTYPE_CACHE }, + { MTYPE_THREAD, "Thread", MTYPE_CACHE }, + { MTYPE_THREAD_MASTER, "Thread master", MTYPE_CACHE }, + { MTYPE_THREAD_STATS, "Thread stats", MTYPE_NOCACHE }, + { MTYPE_THREAD_FUNCNAME, "Thread function name", MTYPE_NOCACHE }, + { MTYPE_VTY, "VTY", MTYPE_NOCACHE }, + { MTYPE_VTY_OUT_BUF, "VTY output buffer", MTYPE_NOCACHE }, + { MTYPE_VTY_HIST, "VTY history", MTYPE_CACHE }, + { MTYPE_IF, "Interface", MTYPE_CACHE }, + { MTYPE_CONNECTED, "Connected", MTYPE_CACHE }, + { MTYPE_CONNECTED_LABEL, "Connected label", MTYPE_NOCACHE }, + { MTYPE_BUFFER, "Buffer", MTYPE_CACHE }, + { MTYPE_BUFFER_DATA, "Buffer data", MTYPE_CACHE }, + { MTYPE_STREAM, "Stream", MTYPE_CACHE }, + { MTYPE_STREAM_DATA, "Stream data", MTYPE_NOCACHE }, + { MTYPE_STREAM_FIFO, "Stream FIFO", MTYPE_CACHE }, + { MTYPE_PREFIX, "Prefix", MTYPE_CACHE }, + { MTYPE_PREFIX_IPV4, "Prefix IPv4", MTYPE_NOCACHE }, + { MTYPE_PREFIX_IPV6, "Prefix IPv6", MTYPE_NOCACHE }, + { MTYPE_HASH, "Hash", MTYPE_CACHE }, + { MTYPE_HASH_BACKET, "Hash Bucket", MTYPE_CACHE }, + { MTYPE_HASH_INDEX, "Hash Index", MTYPE_CACHE }, + { MTYPE_ROUTE_TABLE, "Route table", MTYPE_CACHE }, + { MTYPE_ROUTE_NODE, "Route node", MTYPE_CACHE }, + { MTYPE_DISTRIBUTE, "Distribute list", MTYPE_NOCACHE }, + { MTYPE_DISTRIBUTE_IFNAME, "Dist-list ifname", MTYPE_NOCACHE }, + { MTYPE_ACCESS_LIST, "Access List", MTYPE_NOCACHE }, + { MTYPE_ACCESS_LIST_STR, "Access List Str", MTYPE_NOCACHE }, + { MTYPE_ACCESS_FILTER, "Access Filter", MTYPE_NOCACHE }, + { MTYPE_PREFIX_LIST, "Prefix List", MTYPE_NOCACHE }, + { MTYPE_PREFIX_LIST_ENTRY, "Prefix List Entry", MTYPE_NOCACHE }, + { MTYPE_PREFIX_LIST_STR, "Prefix List Str", MTYPE_NOCACHE }, + { MTYPE_ROUTE_MAP, "Route map", MTYPE_NOCACHE }, + { MTYPE_ROUTE_MAP_NAME, "Route map name", MTYPE_NOCACHE }, + { MTYPE_ROUTE_MAP_INDEX, "Route map index", MTYPE_NOCACHE }, + { MTYPE_ROUTE_MAP_RULE, "Route map rule", MTYPE_NOCACHE }, + { MTYPE_ROUTE_MAP_RULE_STR, "Route map rule str", MTYPE_NOCACHE }, + { MTYPE_ROUTE_MAP_COMPILED, "Route map compiled", MTYPE_NOCACHE }, + { MTYPE_DESC, "Command desc", MTYPE_NOCACHE }, + { MTYPE_KEY, "Key", MTYPE_NOCACHE }, + { MTYPE_KEYCHAIN, "Key chain", MTYPE_NOCACHE }, + { MTYPE_IF_RMAP, "Interface route map", MTYPE_NOCACHE }, + { MTYPE_IF_RMAP_NAME, "I.f. route map name", MTYPE_CACHE }, + { MTYPE_SOCKUNION, "Socket union", MTYPE_CACHE }, + { MTYPE_PRIVS, "Privilege information", MTYPE_NOCACHE }, + { MTYPE_ZLOG, "Logging", MTYPE_NOCACHE }, + { MTYPE_ZCLIENT, "Zclient", MTYPE_NOCACHE }, + { MTYPE_WORK_QUEUE, "Work queue", MTYPE_NOCACHE }, + { MTYPE_WORK_QUEUE_ITEM, "Work queue item", MTYPE_CACHE }, + { MTYPE_WORK_QUEUE_NAME, "Work queue name string", MTYPE_NOCACHE }, + { MTYPE_PQUEUE, "Priority queue", MTYPE_CACHE }, + { MTYPE_PQUEUE_DATA, "Priority queue data", MTYPE_CACHE }, + { MTYPE_HOST, "Host config", MTYPE_NOCACHE }, { -1, NULL }, }; struct memory_list memory_list_zebra[] = { - { MTYPE_RTADV_PREFIX, "Router Advertisement Prefix" }, - { MTYPE_VRF, "VRF" }, - { MTYPE_VRF_NAME, "VRF name" }, - { MTYPE_NEXTHOP, "Nexthop" }, - { MTYPE_RIB, "RIB" }, - { MTYPE_RIB_QUEUE, "RIB process work queue" }, - { MTYPE_STATIC_IPV4, "Static IPv4 route" }, - { MTYPE_STATIC_IPV6, "Static IPv6 route" }, + { MTYPE_RTADV_PREFIX, "Router Advertisement Prefix", MTYPE_NOCACHE }, + { MTYPE_VRF, "VRF", MTYPE_NOCACHE }, + { MTYPE_VRF_NAME, "VRF name", MTYPE_NOCACHE }, + { MTYPE_NEXTHOP, "Nexthop", MTYPE_CACHE }, + { MTYPE_RIB, "RIB", MTYPE_CACHE }, + { MTYPE_RIB_QUEUE, "RIB process work queue", MTYPE_CACHE }, + { MTYPE_STATIC_IPV4, "Static IPv4 route", MTYPE_NOCACHE }, + { MTYPE_STATIC_IPV6, "Static IPv6 route", MTYPE_NOCACHE }, { -1, NULL }, }; struct memory_list memory_list_bgp[] = { - { MTYPE_BGP, "BGP instance" }, - { MTYPE_BGP_PEER, "BGP peer" }, - { MTYPE_BGP_PEER_HOST, "BGP peer hostname" }, - { MTYPE_PEER_GROUP, "Peer group" }, - { MTYPE_PEER_DESC, "Peer description" }, - { MTYPE_ATTR, "BGP attribute" }, - { MTYPE_AS_PATH, "BGP aspath" }, - { MTYPE_AS_SEG, "BGP aspath seg" }, - { MTYPE_AS_SEG_DATA, "BGP aspath segment data" }, - { MTYPE_AS_STR, "BGP aspath str" }, - { 0, NULL }, - { MTYPE_BGP_TABLE, "BGP table" }, - { MTYPE_BGP_NODE, "BGP node" }, - { MTYPE_BGP_ROUTE, "BGP route" }, - { MTYPE_BGP_STATIC, "BGP static" }, - { MTYPE_BGP_ADVERTISE_ATTR, "BGP adv attr" }, - { MTYPE_BGP_ADVERTISE, "BGP adv" }, - { MTYPE_BGP_ADJ_IN, "BGP adj in" }, - { MTYPE_BGP_ADJ_OUT, "BGP adj out" }, - { 0, NULL }, - { MTYPE_AS_LIST, "BGP AS list" }, - { MTYPE_AS_FILTER, "BGP AS filter" }, - { MTYPE_AS_FILTER_STR, "BGP AS filter str" }, - { 0, NULL }, - { MTYPE_COMMUNITY, "community" }, - { MTYPE_COMMUNITY_VAL, "community val" }, - { MTYPE_COMMUNITY_STR, "community str" }, - { 0, NULL }, - { MTYPE_ECOMMUNITY, "extcommunity" }, - { MTYPE_ECOMMUNITY_VAL, "extcommunity val" }, - { MTYPE_ECOMMUNITY_STR, "extcommunity str" }, - { 0, NULL }, - { MTYPE_COMMUNITY_LIST, "community-list" }, - { MTYPE_COMMUNITY_LIST_NAME, "community-list name" }, - { MTYPE_COMMUNITY_LIST_ENTRY, "community-list entry" }, - { MTYPE_COMMUNITY_LIST_CONFIG, "community-list config" }, - { MTYPE_COMMUNITY_LIST_HANDLER, "community-list handler" }, - { 0, NULL }, - { MTYPE_CLUSTER, "Cluster list" }, - { MTYPE_CLUSTER_VAL, "Cluster list val" }, - { 0, NULL }, - { MTYPE_BGP_PROCESS_QUEUE, "BGP Process queue" }, - { MTYPE_BGP_CLEAR_NODE_QUEUE, "BGP node clear queue" }, - { 0, NULL }, - { MTYPE_TRANSIT, "BGP transit attr" }, - { MTYPE_TRANSIT_VAL, "BGP transit val" }, - { 0, NULL }, - { MTYPE_BGP_DISTANCE, "BGP distance" }, - { MTYPE_BGP_NEXTHOP_CACHE, "BGP nexthop" }, - { MTYPE_BGP_CONFED_LIST, "BGP confed list" }, - { MTYPE_PEER_UPDATE_SOURCE, "BGP peer update interface" }, - { MTYPE_BGP_DAMP_INFO, "Dampening info" }, - { MTYPE_BGP_DAMP_ARRAY, "BGP Dampening array" }, - { MTYPE_BGP_REGEXP, "BGP regexp" }, - { MTYPE_BGP_AGGREGATE, "BGP aggregate" }, + { MTYPE_BGP, "BGP instance", MTYPE_NOCACHE }, + { MTYPE_BGP_PEER, "BGP peer", MTYPE_CACHE }, + { MTYPE_BGP_PEER_HOST, "BGP peer hostname", MTYPE_NOCACHE }, + { MTYPE_PEER_GROUP, "Peer group", MTYPE_CACHE }, + { MTYPE_PEER_DESC, "Peer description", MTYPE_NOCACHE }, + { MTYPE_ATTR, "BGP attribute", MTYPE_CACHE }, + { MTYPE_AS_PATH, "BGP aspath", MTYPE_CACHE }, + { MTYPE_AS_SEG, "BGP aspath seg", MTYPE_CACHE }, + { MTYPE_AS_SEG_DATA, "BGP aspath segment data", MTYPE_NOCACHE }, + { MTYPE_AS_STR, "BGP aspath str", MTYPE_NOCACHE }, + { 0, NULL }, + { MTYPE_BGP_TABLE, "BGP table", MTYPE_CACHE }, + { MTYPE_BGP_NODE, "BGP node", MTYPE_CACHE }, + { MTYPE_BGP_ROUTE, "BGP route", MTYPE_CACHE }, + { MTYPE_BGP_STATIC, "BGP static", MTYPE_NOCACHE }, + { MTYPE_BGP_ADVERTISE_ATTR, "BGP adv attr", MTYPE_NOCACHE }, + { MTYPE_BGP_ADVERTISE, "BGP adv", MTYPE_NOCACHE }, + { MTYPE_BGP_ADJ_IN, "BGP adj in", MTYPE_CACHE }, + { MTYPE_BGP_ADJ_OUT, "BGP adj out", MTYPE_CACHE }, + { 0, NULL }, + { MTYPE_AS_LIST, "BGP AS list", MTYPE_NOCACHE }, + { MTYPE_AS_FILTER, "BGP AS filter", MTYPE_NOCACHE }, + { MTYPE_AS_FILTER_STR, "BGP AS filter str", MTYPE_NOCACHE }, + { 0, NULL }, + { MTYPE_COMMUNITY, "community", MTYPE_CACHE }, + { MTYPE_COMMUNITY_VAL, "community val", MTYPE_NOCACHE }, + { MTYPE_COMMUNITY_STR, "community str", MTYPE_NOCACHE }, + { 0, NULL }, + { MTYPE_ECOMMUNITY, "extcommunity", MTYPE_CACHE }, + { MTYPE_ECOMMUNITY_VAL, "extcommunity val", MTYPE_NOCACHE }, + { MTYPE_ECOMMUNITY_STR, "extcommunity str", MTYPE_NOCACHE }, + { 0, NULL }, + { MTYPE_COMMUNITY_LIST, "community-list", MTYPE_NOCACHE }, + { MTYPE_COMMUNITY_LIST_NAME, "community-list name", MTYPE_NOCACHE }, + { MTYPE_COMMUNITY_LIST_ENTRY, "community-list entry", MTYPE_NOCACHE }, + { MTYPE_COMMUNITY_LIST_CONFIG, "community-list config", MTYPE_NOCACHE }, + { MTYPE_COMMUNITY_LIST_HANDLER, "community-list handler", MTYPE_NOCACHE }, + { 0, NULL }, + { MTYPE_CLUSTER, "Cluster list", MTYPE_NOCACHE }, + { MTYPE_CLUSTER_VAL, "Cluster list val", MTYPE_NOCACHE }, + { 0, NULL }, + { MTYPE_BGP_PROCESS_QUEUE, "BGP Process queue", MTYPE_CACHE }, + { 0, NULL }, + { MTYPE_TRANSIT, "BGP transit attr", MTYPE_NOCACHE }, + { MTYPE_TRANSIT_VAL, "BGP transit val", MTYPE_NOCACHE }, + { 0, NULL }, + { MTYPE_BGP_DISTANCE, "BGP distance", MTYPE_NOCACHE }, + { MTYPE_BGP_NEXTHOP_CACHE, "BGP nexthop", MTYPE_CACHE }, + { MTYPE_BGP_CONFED_LIST, "BGP confed list", MTYPE_NOCACHE }, + { MTYPE_PEER_UPDATE_SOURCE, "BGP peer update interface", MTYPE_NOCACHE }, + { MTYPE_BGP_DAMP_INFO, "Dampening info", MTYPE_NOCACHE }, + { MTYPE_BGP_DAMP_ARRAY, "BGP Dampening array", MTYPE_NOCACHE }, + { MTYPE_BGP_REGEXP, "BGP regexp", MTYPE_NOCACHE }, + { MTYPE_BGP_AGGREGATE, "BGP aggregate", MTYPE_NOCACHE }, { -1, NULL } }; struct memory_list memory_list_rip[] = { - { MTYPE_RIP, "RIP structure" }, - { MTYPE_RIP_INFO, "RIP route info" }, - { MTYPE_RIP_INTERFACE, "RIP interface" }, - { MTYPE_RIP_PEER, "RIP peer" }, - { MTYPE_RIP_OFFSET_LIST, "RIP offset list" }, - { MTYPE_RIP_DISTANCE, "RIP distance" }, + { MTYPE_RIP, "RIP structure", MTYPE_NOCACHE }, + { MTYPE_RIP_INFO, "RIP route info", MTYPE_NOCACHE }, + { MTYPE_RIP_INTERFACE, "RIP interface", MTYPE_NOCACHE }, + { MTYPE_RIP_PEER, "RIP peer", MTYPE_NOCACHE }, + { MTYPE_RIP_OFFSET_LIST, "RIP offset list", MTYPE_NOCACHE }, + { MTYPE_RIP_DISTANCE, "RIP distance", MTYPE_NOCACHE }, { -1, NULL } }; struct memory_list memory_list_ripng[] = { - { MTYPE_RIPNG, "RIPng structure" }, - { MTYPE_RIPNG_ROUTE, "RIPng route info" }, - { MTYPE_RIPNG_AGGREGATE, "RIPng aggregate" }, - { MTYPE_RIPNG_PEER, "RIPng peer" }, - { MTYPE_RIPNG_OFFSET_LIST, "RIPng offset lst" }, - { MTYPE_RIPNG_RTE_DATA, "RIPng rte data" }, + { MTYPE_RIPNG, "RIPng structure", MTYPE_NOCACHE }, + { MTYPE_RIPNG_ROUTE, "RIPng route info", MTYPE_NOCACHE }, + { MTYPE_RIPNG_AGGREGATE, "RIPng aggregate", MTYPE_NOCACHE }, + { MTYPE_RIPNG_PEER, "RIPng peer", MTYPE_NOCACHE }, + { MTYPE_RIPNG_OFFSET_LIST, "RIPng offset lst", MTYPE_NOCACHE }, + { MTYPE_RIPNG_RTE_DATA, "RIPng rte data", MTYPE_NOCACHE }, { -1, NULL } }; struct memory_list memory_list_ospf[] = { - { MTYPE_OSPF_TOP, "OSPF top" }, - { MTYPE_OSPF_AREA, "OSPF area" }, - { MTYPE_OSPF_AREA_RANGE, "OSPF area range" }, - { MTYPE_OSPF_NETWORK, "OSPF network" }, - { MTYPE_OSPF_NEIGHBOR_STATIC,"OSPF static nbr" }, - { MTYPE_OSPF_IF, "OSPF interface" }, - { MTYPE_OSPF_NEIGHBOR, "OSPF neighbor" }, - { MTYPE_OSPF_ROUTE, "OSPF route" }, - { MTYPE_OSPF_TMP, "OSPF tmp mem" }, - { MTYPE_OSPF_LSA, "OSPF LSA" }, - { MTYPE_OSPF_LSA_DATA, "OSPF LSA data" }, - { MTYPE_OSPF_LSDB, "OSPF LSDB" }, - { MTYPE_OSPF_PACKET, "OSPF packet" }, - { MTYPE_OSPF_FIFO, "OSPF FIFO queue" }, - { MTYPE_OSPF_VERTEX, "OSPF vertex" }, - { MTYPE_OSPF_VERTEX_PARENT, "OSPF vertex parent", }, - { MTYPE_OSPF_NEXTHOP, "OSPF nexthop" }, - { MTYPE_OSPF_PATH, "OSPF path" }, - { MTYPE_OSPF_VL_DATA, "OSPF VL data" }, - { MTYPE_OSPF_CRYPT_KEY, "OSPF crypt key" }, - { MTYPE_OSPF_EXTERNAL_INFO, "OSPF ext. info" }, - { MTYPE_OSPF_DISTANCE, "OSPF distance" }, - { MTYPE_OSPF_IF_INFO, "OSPF if info" }, - { MTYPE_OSPF_IF_PARAMS, "OSPF if params" }, - { MTYPE_OSPF_MESSAGE, "OSPF message" }, + { MTYPE_OSPF_TOP, "OSPF top", MTYPE_NOCACHE }, + { MTYPE_OSPF_AREA, "OSPF area", MTYPE_CACHE }, + { MTYPE_OSPF_AREA_RANGE, "OSPF area range", MTYPE_NOCACHE }, + { MTYPE_OSPF_NETWORK, "OSPF network", MTYPE_CACHE }, + { MTYPE_OSPF_NEIGHBOR_STATIC,"OSPF static nbr", MTYPE_NOCACHE }, + { MTYPE_OSPF_IF, "OSPF interface", MTYPE_CACHE }, + { MTYPE_OSPF_NEIGHBOR, "OSPF neighbor", MTYPE_CACHE }, + { MTYPE_OSPF_ROUTE, "OSPF route", MTYPE_CACHE }, + { MTYPE_OSPF_TMP, "OSPF tmp mem", MTYPE_NOCACHE }, + { MTYPE_OSPF_LSA, "OSPF LSA", MTYPE_CACHE }, + { MTYPE_OSPF_LSA_DATA, "OSPF LSA data", MTYPE_NOCACHE }, + { MTYPE_OSPF_LSDB, "OSPF LSDB", MTYPE_CACHE }, + { MTYPE_OSPF_PACKET, "OSPF packet", MTYPE_CACHE }, + { MTYPE_OSPF_FIFO, "OSPF FIFO queue", MTYPE_NOCACHE }, + { MTYPE_OSPF_VERTEX, "OSPF vertex", MTYPE_CACHE }, + { MTYPE_OSPF_VERTEX_PARENT, "OSPF vertex parent", MTYPE_CACHE }, + { MTYPE_OSPF_NEXTHOP, "OSPF nexthop", MTYPE_CACHE }, + { MTYPE_OSPF_PATH, "OSPF path", MTYPE_CACHE }, + { MTYPE_OSPF_VL_DATA, "OSPF VL data", MTYPE_NOCACHE }, + { MTYPE_OSPF_CRYPT_KEY, "OSPF crypt key", MTYPE_NOCACHE }, + { MTYPE_OSPF_EXTERNAL_INFO, "OSPF ext. info", MTYPE_NOCACHE }, + { MTYPE_OSPF_DISTANCE, "OSPF distance", MTYPE_NOCACHE }, + { MTYPE_OSPF_IF_INFO, "OSPF if info", MTYPE_NOCACHE }, + { MTYPE_OSPF_IF_PARAMS, "OSPF if params", MTYPE_NOCACHE }, + { MTYPE_OSPF_MESSAGE, "OSPF message", MTYPE_NOCACHE }, { -1, NULL }, }; struct memory_list memory_list_ospf6[] = { - { MTYPE_OSPF6_TOP, "OSPF6 top" }, - { MTYPE_OSPF6_AREA, "OSPF6 area" }, - { MTYPE_OSPF6_IF, "OSPF6 interface" }, - { MTYPE_OSPF6_NEIGHBOR, "OSPF6 neighbor" }, - { MTYPE_OSPF6_ROUTE, "OSPF6 route" }, - { MTYPE_OSPF6_PREFIX, "OSPF6 prefix" }, - { MTYPE_OSPF6_MESSAGE, "OSPF6 message" }, - { MTYPE_OSPF6_LSA, "OSPF6 LSA" }, - { MTYPE_OSPF6_LSA_SUMMARY, "OSPF6 LSA summary" }, - { MTYPE_OSPF6_LSDB, "OSPF6 LSA database" }, - { MTYPE_OSPF6_VERTEX, "OSPF6 vertex" }, - { MTYPE_OSPF6_SPFTREE, "OSPF6 SPF tree" }, - { MTYPE_OSPF6_NEXTHOP, "OSPF6 nexthop" }, - { MTYPE_OSPF6_EXTERNAL_INFO,"OSPF6 ext. info" }, - { MTYPE_OSPF6_OTHER, "OSPF6 other" }, + { MTYPE_OSPF6_TOP, "OSPF6 top", MTYPE_NOCACHE }, + { MTYPE_OSPF6_AREA, "OSPF6 area", MTYPE_NOCACHE }, + { MTYPE_OSPF6_IF, "OSPF6 interface", MTYPE_NOCACHE }, + { MTYPE_OSPF6_NEIGHBOR, "OSPF6 neighbor", MTYPE_NOCACHE }, + { MTYPE_OSPF6_ROUTE, "OSPF6 route", MTYPE_NOCACHE }, + { MTYPE_OSPF6_PREFIX, "OSPF6 prefix", MTYPE_NOCACHE }, + { MTYPE_OSPF6_MESSAGE, "OSPF6 message", MTYPE_NOCACHE }, + { MTYPE_OSPF6_LSA, "OSPF6 LSA", MTYPE_NOCACHE }, + { MTYPE_OSPF6_LSA_SUMMARY, "OSPF6 LSA summary", MTYPE_NOCACHE }, + { MTYPE_OSPF6_LSDB, "OSPF6 LSA database", MTYPE_NOCACHE }, + { MTYPE_OSPF6_VERTEX, "OSPF6 vertex", MTYPE_NOCACHE }, + { MTYPE_OSPF6_SPFTREE, "OSPF6 SPF tree", MTYPE_NOCACHE }, + { MTYPE_OSPF6_NEXTHOP, "OSPF6 nexthop", MTYPE_NOCACHE }, + { MTYPE_OSPF6_EXTERNAL_INFO,"OSPF6 ext. info", MTYPE_NOCACHE }, + { MTYPE_OSPF6_OTHER, "OSPF6 other", MTYPE_NOCACHE }, { -1, NULL }, }; struct memory_list memory_list_isis[] = { - { MTYPE_ISIS, "ISIS" }, - { MTYPE_ISIS_TMP, "ISIS TMP" }, - { MTYPE_ISIS_CIRCUIT, "ISIS circuit" }, - { MTYPE_ISIS_LSP, "ISIS LSP" }, - { MTYPE_ISIS_ADJACENCY, "ISIS adjacency" }, - { MTYPE_ISIS_AREA, "ISIS area" }, - { MTYPE_ISIS_AREA_ADDR, "ISIS area address" }, - { MTYPE_ISIS_TLV, "ISIS TLV" }, - { MTYPE_ISIS_DYNHN, "ISIS dyn hostname" }, - { MTYPE_ISIS_SPFTREE, "ISIS SPFtree" }, - { MTYPE_ISIS_VERTEX, "ISIS vertex" }, - { MTYPE_ISIS_ROUTE_INFO, "ISIS route info" }, - { MTYPE_ISIS_NEXTHOP, "ISIS nexthop" }, - { MTYPE_ISIS_NEXTHOP6, "ISIS nexthop6" }, + { MTYPE_ISIS, "ISIS", MTYPE_NOCACHE }, + { MTYPE_ISIS_TMP, "ISIS TMP", MTYPE_NOCACHE }, + { MTYPE_ISIS_CIRCUIT, "ISIS circuit", MTYPE_NOCACHE }, + { MTYPE_ISIS_LSP, "ISIS LSP", MTYPE_NOCACHE }, + { MTYPE_ISIS_ADJACENCY, "ISIS adjacency", MTYPE_NOCACHE }, + { MTYPE_ISIS_AREA, "ISIS area", MTYPE_NOCACHE }, + { MTYPE_ISIS_AREA_ADDR, "ISIS area address", MTYPE_NOCACHE }, + { MTYPE_ISIS_TLV, "ISIS TLV", MTYPE_NOCACHE }, + { MTYPE_ISIS_DYNHN, "ISIS dyn hostname", MTYPE_NOCACHE }, + { MTYPE_ISIS_SPFTREE, "ISIS SPFtree", MTYPE_NOCACHE }, + { MTYPE_ISIS_VERTEX, "ISIS vertex", MTYPE_CACHE }, + { MTYPE_ISIS_ROUTE_INFO, "ISIS route info", MTYPE_NOCACHE }, + { MTYPE_ISIS_NEXTHOP, "ISIS nexthop", MTYPE_NOCACHE }, + { MTYPE_ISIS_NEXTHOP6, "ISIS nexthop6", MTYPE_NOCACHE }, { -1, NULL }, }; struct memory_list memory_list_vtysh[] = { - { MTYPE_VTYSH_CONFIG, "Vtysh configuration", }, - { MTYPE_VTYSH_CONFIG_LINE, "Vtysh configuration line" }, + { MTYPE_VTYSH_CONFIG, "Vtysh configuration", MTYPE_NOCACHE }, + { MTYPE_VTYSH_CONFIG_LINE, "Vtysh configuration line", MTYPE_NOCACHE }, { -1, NULL }, }; -struct mlist mlists[] __attribute__ ((unused)) = { +struct mlist mlists[] __attribute__ ((unused)) = +{ { memory_list_lib, "LIB" }, { memory_list_zebra, "ZEBRA" }, { memory_list_rip, "RIP" }, diff --git a/lib/workqueue.c b/lib/workqueue.c --- a/lib/workqueue.c +++ b/lib/workqueue.c @@ -80,7 +80,12 @@ work_queue_new (struct thread_master *m, listnode_add (&work_queues, new); new->cycles.granularity = WORK_QUEUE_MIN_GRANULARITY; - + + /* Default values, can be overriden by caller */ + new->spec.delay = WORK_QUEUE_DEFAULT_DELAY; + new->spec.hold = WORK_QUEUE_DEFAULT_HOLD; + new->spec.flood = WORK_QUEUE_DEFAULT_FLOOD; + return new; } @@ -128,7 +133,7 @@ work_queue_add (struct work_queue *wq, v item->data = data; listnode_add (wq->items, item); - work_queue_schedule (wq, wq->spec.hold); + work_queue_schedule (wq, wq->spec.delay); return; } @@ -142,7 +147,7 @@ work_queue_item_remove (struct work_queu /* call private data deletion callback if needed */ if (wq->spec.del_item_data) - wq->spec.del_item_data (item->data); + wq->spec.del_item_data (wq, item->data); list_delete_node (wq->items, ln); work_queue_item_free (item); @@ -167,12 +172,12 @@ DEFUN(show_work_queues, struct work_queue *wq; vty_out (vty, - "%c %8s %11s %8s %21s%s", - ' ', "List","(ms) ","Q. Runs","Cycle Counts ", + "%c%c %8s %11s %8s %21s%s", + ' ', ' ', "List","(ms) ","Q. Runs","Cycle Counts ", VTY_NEWLINE); vty_out (vty, - "%c %8s %5s %5s %8s %7s %6s %6s %s%s", - ' ', + "%c%c %8s %5s %5s %8s %7s %6s %6s %s%s", + 'P', 'F', "Items", "Delay","Hold", "Total", @@ -182,8 +187,9 @@ DEFUN(show_work_queues, for (ALL_LIST_ELEMENTS_RO ((&work_queues), node, wq)) { - vty_out (vty,"%c %8d %5d %5d %8ld %7d %6d %6u %s%s", + vty_out (vty,"%c%c %8d %5d %5d %8ld %7d %6d %6u %s%s", (wq->flags == WQ_PLUGGED ? 'P' : ' '), + (wq->runs_since_clear >= wq->spec.flood ? 'F' : ' '), listcount (wq->items), wq->spec.delay, wq->spec.hold, wq->runs, @@ -220,7 +226,7 @@ work_queue_unplug (struct work_queue *wq wq->flags = WQ_UNPLUGGED; /* if thread isnt already waiting, add one */ - work_queue_schedule (wq, wq->spec.hold); + work_queue_schedule (wq, wq->spec.delay); } /* timer thread to process a work queue @@ -278,7 +284,7 @@ work_queue_run (struct thread *thread) /* run and take care of items that want to be retried immediately */ do { - ret = wq->spec.workfunc (item->data); + ret = wq->spec.workfunc (wq, item->data); item->ran++; } while ((ret == WQ_RETRY_NOW) @@ -364,9 +370,20 @@ stats: /* Is the queue done yet? If it is, call the completion callback. */ if (listcount (wq->items) > 0) - work_queue_schedule (wq, wq->spec.delay); - else if (wq->spec.completion_func) + { + if (++(wq->runs_since_clear) < wq->spec.flood) + work_queue_schedule (wq, wq->spec.hold); + else + work_queue_schedule (wq, 0); /* queue flooded, go into overdrive */ + + return; + } + + /* queue has been cleared */ + wq->runs_since_clear = 0; + + if (wq->spec.completion_func) wq->spec.completion_func (wq); - return 0; + return; } diff --git a/lib/workqueue.h b/lib/workqueue.h --- a/lib/workqueue.h +++ b/lib/workqueue.h @@ -25,8 +25,9 @@ #define _QUAGGA_WORK_QUEUE_H /* Work queue default hold and cycle times - millisec */ -#define WORK_QUEUE_DEFAULT_HOLD 50 /* hold time for initial run of a queue */ -#define WORK_QUEUE_DEFAULT_DELAY 10 /* minimum delay between queue runs */ +#define WORK_QUEUE_DEFAULT_HOLD 50 /* hold-time between runs of a queue */ +#define WORK_QUEUE_DEFAULT_DELAY 10 /* minimum delay for queue runs */ +#define WORK_QUEUE_DEFAULT_FLOOD 40 /* flood factor, ~2s with prev values */ /* action value, for use by item processor and item error handlers */ typedef enum @@ -56,21 +57,31 @@ enum work_queue_flags struct work_queue { + /* Everything but the specification struct is private + * the following may be read + */ struct thread_master *master; /* thread master */ struct thread *thread; /* thread, if one is active */ char *name; /* work queue name */ - enum work_queue_flags flags; /* flags */ - /* specification for this work queue */ + /* Specification for this work queue. + * Public, must be set before use by caller. May be modified at will. + */ struct { - /* work function to process items with */ - wq_item_status (*workfunc) (void *); + /* optional opaque user data, global to the queue. */ + void *data; + + /* work function to process items with: + * First argument is the workqueue queue. + * Second argument is the item data + */ + wq_item_status (*workfunc) (struct work_queue *, void *); /* error handling function, optional */ void (*errorfunc) (struct work_queue *, struct work_queue_item *); /* callback to delete user specific item data */ - void (*del_item_data) (void *); + void (*del_item_data) (struct work_queue *, void *); /* completion callback, called when queue is emptied, optional */ void (*completion_func) (struct work_queue *); @@ -80,17 +91,36 @@ struct work_queue unsigned int hold; /* hold time for first run, in ms */ unsigned int delay; /* min delay between queue runs, in ms */ + + unsigned int flood; /* number of queue runs after which we consider + * queue to be flooded, where the runs are + * consecutive and each has used its full slot, + * and the queue has still not been cleared. If + * the queue is flooded, then we try harder to + * clear it by ignoring the hold and delay + * times. No point sparing CPU resources just + * to use ever more memory resources. + */ } spec; /* remaining fields should be opaque to users */ struct list *items; /* queue item list */ unsigned long runs; /* runs count */ + unsigned int runs_since_clear; /* number of runs since queue was + * last cleared + */ struct { unsigned int best; unsigned int granularity; unsigned long total; } cycles; /* cycle counts */ + + /* private state */ + enum work_queue_flags flags; /* user set flag */ + char status; /* internal status */ +#define WQ_STATE_FLOODED (1 << 0) + }; /* User API */ diff --git a/zebra/ChangeLog b/zebra/ChangeLog --- a/zebra/ChangeLog +++ b/zebra/ChangeLog @@ -1,3 +1,12 @@ +2005-11-1 Paul Jakma + + * zebra_rib.c: (rib_process) convert to new workqueue specs and + shut up gcc, which complains about cast from void via + function parameters, for some dumb reason. Do the cast + inside the function instead. + (rib_queue_qnode_del) ditto. + (rib_queue_init) no need for the casts anymore. + 2005-11-11 Paul Jakma * kernel_socket.c: (ifm_read) arithmetic on void pointer diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -881,8 +881,9 @@ rib_uninstall (struct route_node *rn, st /* Core function for processing routing information base. */ static wq_item_status -rib_process (struct zebra_queue_node_t *qnode) +rib_process (struct work_queue *wq, void *data) { + struct zebra_queue_node_t *qnode = data; struct rib *rib; struct rib *next; struct rib *fib = NULL; @@ -1052,8 +1053,9 @@ rib_queue_add (struct zebra_t *zebra, st /* free zebra_queue_node_t */ static void -rib_queue_qnode_del (struct zebra_queue_node_t *qnode) +rib_queue_qnode_del (struct work_queue *wq, void *data) { + struct zebra_queue_node_t *qnode = data; route_unlock_node (qnode->node); if (qnode->del) @@ -1076,13 +1078,11 @@ rib_queue_init (struct zebra_t *zebra) } /* fill in the work queue spec */ - zebra->ribq->spec.workfunc = (wq_item_status (*) (void *))&rib_process; + zebra->ribq->spec.workfunc = &rib_process; zebra->ribq->spec.errorfunc = NULL; - zebra->ribq->spec.del_item_data = (void (*) (void *)) &rib_queue_qnode_del; + zebra->ribq->spec.del_item_data = &rib_queue_qnode_del; /* XXX: TODO: These should be runtime configurable via vty */ zebra->ribq->spec.max_retries = 3; - zebra->ribq->spec.hold = 500; - zebra->ribq->spec.delay = 10; return; }