1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2017, Joyent, Inc.
24 */
25
26 /*
27 * IEEE 802.3ad Link Aggregation - LACP & Marker Protocol processing.
28 */
29
30 #include <sys/types.h>
31 #include <sys/sysmacros.h>
32 #include <sys/callb.h>
33 #include <sys/conf.h>
34 #include <sys/cmn_err.h>
35 #include <sys/disp.h>
36 #include <sys/list.h>
37 #include <sys/ksynch.h>
38 #include <sys/kmem.h>
39 #include <sys/stream.h>
40 #include <sys/modctl.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/atomic.h>
44 #include <sys/stat.h>
45 #include <sys/byteorder.h>
46 #include <sys/strsun.h>
47 #include <sys/isa_defs.h>
48 #include <sys/sdt.h>
49
50 #include <sys/aggr.h>
51 #include <sys/aggr_impl.h>
52
53 static struct ether_addr etherzeroaddr = {
54 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
55 };
56
57 /*
58 * Slow_Protocol_Multicast address, as per IEEE 802.3ad spec.
59 */
60 static struct ether_addr slow_multicast_addr = {
61 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02
62 };
63
64 #ifdef DEBUG
65 /* LACP state machine debugging support */
66 static uint32_t aggr_lacp_debug = 0;
67 #define AGGR_LACP_DBG(x) if (aggr_lacp_debug) { (void) printf x; }
68 #else
69 #define AGGR_LACP_DBG(x) {}
70 #endif /* DEBUG */
71
72 #define NSECS_PER_SEC 1000000000ll
73
74 /* used by lacp_misconfig_walker() */
75 typedef struct lacp_misconfig_check_state_s {
76 aggr_port_t *cs_portp;
77 boolean_t cs_found;
78 } lacp_misconfig_check_state_t;
79
80 static const char *lacp_receive_str[] = LACP_RECEIVE_STATE_STRINGS;
81 static const char *lacp_periodic_str[] = LACP_PERIODIC_STRINGS;
82 static const char *lacp_mux_str[] = LACP_MUX_STRINGS;
83
84 static uint16_t lacp_port_priority = 0x1000;
85 static uint16_t lacp_system_priority = 0x1000;
86
87 /*
88 * Maintains a list of all ports in ATTACHED state. This information
89 * is used to detect misconfiguration.
90 */
91 typedef struct lacp_sel_ports {
92 datalink_id_t sp_grp_linkid;
93 datalink_id_t sp_linkid;
94 /* Note: sp_partner_system must be 2-byte aligned */
95 struct ether_addr sp_partner_system;
96 uint32_t sp_partner_key;
97 struct lacp_sel_ports *sp_next;
98 } lacp_sel_ports_t;
99
100 static lacp_sel_ports_t *sel_ports = NULL;
101 static kmutex_t lacp_sel_lock;
102
103 static void periodic_timer_pop(void *);
104 static void periodic_timer_pop_handler(aggr_port_t *);
105 static void lacp_xmit_sm(aggr_port_t *);
106 static void lacp_periodic_sm(aggr_port_t *);
107 static void fill_lacp_pdu(aggr_port_t *, lacp_t *);
108 static void fill_lacp_ether(aggr_port_t *, struct ether_header *);
109 static void lacp_on(aggr_port_t *);
110 static void lacp_off(aggr_port_t *);
111 static boolean_t valid_lacp_pdu(aggr_port_t *, lacp_t *);
112 static void lacp_receive_sm(aggr_port_t *, lacp_t *);
113 static void aggr_set_coll_dist(aggr_port_t *, boolean_t);
114 static void start_wait_while_timer(aggr_port_t *);
115 static void stop_wait_while_timer(aggr_port_t *);
116 static void lacp_reset_port(aggr_port_t *);
117 static void stop_current_while_timer(aggr_port_t *);
118 static void current_while_timer_pop(void *);
119 static void current_while_timer_pop_handler(aggr_port_t *);
120 static void update_default_selected(aggr_port_t *);
121 static boolean_t update_selected(aggr_port_t *, lacp_t *);
122 static boolean_t lacp_sel_ports_add(aggr_port_t *);
123 static void lacp_sel_ports_del(aggr_port_t *);
124 static void wait_while_timer_pop(void *);
125 static void wait_while_timer_pop_handler(aggr_port_t *);
126
127 void
aggr_lacp_init(void)128 aggr_lacp_init(void)
129 {
130 mutex_init(&lacp_sel_lock, NULL, MUTEX_DEFAULT, NULL);
131 }
132
133 void
aggr_lacp_fini(void)134 aggr_lacp_fini(void)
135 {
136 mutex_destroy(&lacp_sel_lock);
137 }
138
139 /*
140 * The following functions are used for handling LACP timers.
141 *
142 * Note that we cannot fully rely on the aggr's mac perimeter in the timeout
143 * handler routine, otherwise it may cause deadlock with the untimeout() call
144 * which is usually called with the mac perimeter held. Instead, a
145 * lacp_timer_lock mutex is introduced, which protects a bitwise flag
146 * (lacp_timer_bits). This flag is set/cleared by timeout()/stop_timer()
147 * routines and is checked by a dedicated thread, that executes the real
148 * timeout operation.
149 */
150 static void
aggr_port_timer_thread(void * arg)151 aggr_port_timer_thread(void *arg)
152 {
153 aggr_port_t *port = arg;
154 aggr_lacp_port_t *pl = &port->lp_lacp;
155 aggr_grp_t *grp = port->lp_grp;
156 uint32_t lacp_timer_bits;
157 mac_perim_handle_t mph;
158 callb_cpr_t cprinfo;
159
160 CALLB_CPR_INIT(&cprinfo, &pl->lacp_timer_lock, callb_generic_cpr,
161 "aggr_port_timer_thread");
162
163 mutex_enter(&pl->lacp_timer_lock);
164
165 for (;;) {
166
167 if ((lacp_timer_bits = pl->lacp_timer_bits) == 0) {
168 CALLB_CPR_SAFE_BEGIN(&cprinfo);
169 cv_wait(&pl->lacp_timer_cv, &pl->lacp_timer_lock);
170 CALLB_CPR_SAFE_END(&cprinfo, &pl->lacp_timer_lock);
171 continue;
172 }
173 pl->lacp_timer_bits = 0;
174
175 if (lacp_timer_bits & LACP_THREAD_EXIT)
176 break;
177
178 if (lacp_timer_bits & LACP_PERIODIC_TIMEOUT)
179 pl->periodic_timer.id = 0;
180 if (lacp_timer_bits & LACP_WAIT_WHILE_TIMEOUT)
181 pl->wait_while_timer.id = 0;
182 if (lacp_timer_bits & LACP_CURRENT_WHILE_TIMEOUT)
183 pl->current_while_timer.id = 0;
184
185 mutex_exit(&pl->lacp_timer_lock);
186
187 mac_perim_enter_by_mh(grp->lg_mh, &mph);
188 if (port->lp_closing) {
189 mac_perim_exit(mph);
190 mutex_enter(&pl->lacp_timer_lock);
191 break;
192 }
193
194 if (lacp_timer_bits & LACP_PERIODIC_TIMEOUT)
195 periodic_timer_pop_handler(port);
196 if (lacp_timer_bits & LACP_WAIT_WHILE_TIMEOUT)
197 wait_while_timer_pop_handler(port);
198 if (lacp_timer_bits & LACP_CURRENT_WHILE_TIMEOUT)
199 current_while_timer_pop_handler(port);
200 mac_perim_exit(mph);
201
202 mutex_enter(&pl->lacp_timer_lock);
203 if (pl->lacp_timer_bits & LACP_THREAD_EXIT)
204 break;
205 }
206
207 pl->lacp_timer_bits = 0;
208 pl->lacp_timer_thread = NULL;
209 cv_broadcast(&pl->lacp_timer_cv);
210
211 /* CALLB_CPR_EXIT drops the lock */
212 CALLB_CPR_EXIT(&cprinfo);
213
214 /*
215 * Release the reference of the grp so aggr_grp_delete() can call
216 * mac_unregister() safely.
217 */
218 aggr_grp_port_rele(port);
219 thread_exit();
220 }
221
222 /*
223 * Set the port LACP state to SELECTED. Returns B_FALSE if the operation
224 * could not be performed due to a memory allocation error, B_TRUE otherwise.
225 */
226 static boolean_t
lacp_port_select(aggr_port_t * portp)227 lacp_port_select(aggr_port_t *portp)
228 {
229 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
230
231 if (!lacp_sel_ports_add(portp))
232 return (B_FALSE);
233 portp->lp_lacp.sm.selected = AGGR_SELECTED;
234 return (B_TRUE);
235 }
236
237 /*
238 * Set the port LACP state to UNSELECTED.
239 */
240 static void
lacp_port_unselect(aggr_port_t * portp)241 lacp_port_unselect(aggr_port_t *portp)
242 {
243 aggr_grp_t *grp = portp->lp_grp;
244
245 ASSERT((grp->lg_mh == NULL) || MAC_PERIM_HELD(grp->lg_mh));
246
247 lacp_sel_ports_del(portp);
248 portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
249 }
250
251 /*
252 * Initialize group specific LACP state and parameters.
253 */
254 void
aggr_lacp_init_grp(aggr_grp_t * aggrp)255 aggr_lacp_init_grp(aggr_grp_t *aggrp)
256 {
257 aggrp->aggr.PeriodicTimer = AGGR_LACP_TIMER_SHORT;
258 aggrp->aggr.ActorSystemPriority = (uint16_t)lacp_system_priority;
259 aggrp->aggr.CollectorMaxDelay = 10;
260 aggrp->lg_lacp_mode = AGGR_LACP_OFF;
261 aggrp->aggr.ready = B_FALSE;
262 }
263
264 /*
265 * Complete LACP info initialization at port creation time.
266 */
267 void
aggr_lacp_init_port(aggr_port_t * portp)268 aggr_lacp_init_port(aggr_port_t *portp)
269 {
270 aggr_grp_t *aggrp = portp->lp_grp;
271 aggr_lacp_port_t *pl = &portp->lp_lacp;
272
273 ASSERT(aggrp->lg_mh == NULL || MAC_PERIM_HELD(aggrp->lg_mh));
274 ASSERT(MAC_PERIM_HELD(portp->lp_mh));
275
276 /* actor port # */
277 pl->ActorPortNumber = portp->lp_portid;
278 AGGR_LACP_DBG(("aggr_lacp_init_port(%d): "
279 "ActorPortNumber = 0x%x\n", portp->lp_linkid,
280 pl->ActorPortNumber));
281
282 pl->ActorPortPriority = (uint16_t)lacp_port_priority;
283 pl->ActorPortAggrId = 0; /* aggregator id - not used */
284 pl->NTT = B_FALSE; /* need to transmit */
285
286 pl->ActorAdminPortKey = aggrp->lg_key;
287 pl->ActorOperPortKey = pl->ActorAdminPortKey;
288 AGGR_LACP_DBG(("aggr_lacp_init_port(%d) "
289 "ActorAdminPortKey = 0x%x, ActorAdminPortKey = 0x%x\n",
290 portp->lp_linkid, pl->ActorAdminPortKey, pl->ActorOperPortKey));
291
292 /* Actor admin. port state */
293 pl->ActorAdminPortState.bit.activity = B_FALSE;
294 pl->ActorAdminPortState.bit.timeout = B_TRUE;
295 pl->ActorAdminPortState.bit.aggregation = B_TRUE;
296 pl->ActorAdminPortState.bit.sync = B_FALSE;
297 pl->ActorAdminPortState.bit.collecting = B_FALSE;
298 pl->ActorAdminPortState.bit.distributing = B_FALSE;
299 pl->ActorAdminPortState.bit.defaulted = B_FALSE;
300 pl->ActorAdminPortState.bit.expired = B_FALSE;
301 pl->ActorOperPortState = pl->ActorAdminPortState;
302
303 /*
304 * Partner Administrative Information
305 * (All initialized to zero except for the following)
306 * Fast Timeouts.
307 */
308 pl->PartnerAdminPortState.bit.timeout =
309 pl->PartnerOperPortState.bit.timeout = B_TRUE;
310
311 pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
312
313 /*
314 * State machine information.
315 */
316 pl->sm.lacp_on = B_FALSE; /* LACP Off default */
317 pl->sm.begin = B_TRUE; /* Prevents transmissions */
318 pl->sm.lacp_enabled = B_FALSE;
319 pl->sm.port_enabled = B_FALSE; /* Link Down */
320 pl->sm.actor_churn = B_FALSE;
321 pl->sm.partner_churn = B_FALSE;
322 pl->sm.ready_n = B_FALSE;
323 pl->sm.port_moved = B_FALSE;
324
325 lacp_port_unselect(portp);
326
327 pl->sm.periodic_state = LACP_NO_PERIODIC;
328 pl->sm.receive_state = LACP_INITIALIZE;
329 pl->sm.mux_state = LACP_DETACHED;
330 pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
331
332 /*
333 * Timer information.
334 */
335 pl->current_while_timer.id = 0;
336 pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
337
338 pl->periodic_timer.id = 0;
339 pl->periodic_timer.val = FAST_PERIODIC_TIME;
340
341 pl->wait_while_timer.id = 0;
342 pl->wait_while_timer.val = AGGREGATE_WAIT_TIME;
343
344 pl->lacp_timer_bits = 0;
345
346 mutex_init(&pl->lacp_timer_lock, NULL, MUTEX_DRIVER, NULL);
347 cv_init(&pl->lacp_timer_cv, NULL, CV_DRIVER, NULL);
348
349 pl->lacp_timer_thread = thread_create(NULL, 0, aggr_port_timer_thread,
350 portp, 0, &p0, TS_RUN, minclsyspri);
351
352 /*
353 * Hold a reference of the grp and the port and this reference will
354 * be release when the thread exits.
355 *
356 * The reference on the port is used for aggr_port_delete() to
357 * continue without waiting for the thread to exit; the reference
358 * on the grp is used for aggr_grp_delete() to wait for the thread
359 * to exit before calling mac_unregister().
360 */
361 aggr_grp_port_hold(portp);
362 }
363
364 /*
365 * Port initialization when we need to
366 * turn LACP on/off, etc. Not everything is
367 * reset like in the above routine.
368 * Do NOT modify things like link status.
369 */
370 static void
lacp_reset_port(aggr_port_t * portp)371 lacp_reset_port(aggr_port_t *portp)
372 {
373 aggr_lacp_port_t *pl = &portp->lp_lacp;
374
375 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
376
377 pl->NTT = B_FALSE; /* need to transmit */
378
379 /* reset operational port state */
380 pl->ActorOperPortState.bit.timeout =
381 pl->ActorAdminPortState.bit.timeout;
382
383 pl->ActorOperPortState.bit.sync = B_FALSE;
384 pl->ActorOperPortState.bit.collecting = B_FALSE;
385 pl->ActorOperPortState.bit.distributing = B_FALSE;
386 pl->ActorOperPortState.bit.defaulted = B_TRUE;
387 pl->ActorOperPortState.bit.expired = B_FALSE;
388
389 pl->PartnerOperPortState.bit.timeout = B_TRUE; /* fast t/o */
390 pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
391
392 /*
393 * State machine information.
394 */
395 pl->sm.begin = B_TRUE; /* Prevents transmissions */
396 pl->sm.actor_churn = B_FALSE;
397 pl->sm.partner_churn = B_FALSE;
398 pl->sm.ready_n = B_FALSE;
399
400 lacp_port_unselect(portp);
401
402 pl->sm.periodic_state = LACP_NO_PERIODIC;
403 pl->sm.receive_state = LACP_INITIALIZE;
404 pl->sm.mux_state = LACP_DETACHED;
405 pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
406
407 /*
408 * Timer information.
409 */
410 pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
411 pl->periodic_timer.val = FAST_PERIODIC_TIME;
412 }
413
414 static void
aggr_lacp_mcast_on(aggr_port_t * port)415 aggr_lacp_mcast_on(aggr_port_t *port)
416 {
417 ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh));
418 ASSERT(MAC_PERIM_HELD(port->lp_mh));
419
420 if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
421 return;
422
423 (void) aggr_port_multicst(port, B_TRUE,
424 (uchar_t *)&slow_multicast_addr);
425 }
426
427 static void
aggr_lacp_mcast_off(aggr_port_t * port)428 aggr_lacp_mcast_off(aggr_port_t *port)
429 {
430 ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh));
431 ASSERT(MAC_PERIM_HELD(port->lp_mh));
432
433 if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
434 return;
435
436 (void) aggr_port_multicst(port, B_FALSE,
437 (uchar_t *)&slow_multicast_addr);
438 }
439
440 static void
start_periodic_timer(aggr_port_t * portp)441 start_periodic_timer(aggr_port_t *portp)
442 {
443 aggr_lacp_port_t *pl = &portp->lp_lacp;
444
445 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
446
447 mutex_enter(&pl->lacp_timer_lock);
448 if (pl->periodic_timer.id == 0) {
449 pl->periodic_timer.id = timeout(periodic_timer_pop, portp,
450 drv_usectohz(1000000 * portp->lp_lacp.periodic_timer.val));
451 }
452 mutex_exit(&pl->lacp_timer_lock);
453 }
454
455 static void
stop_periodic_timer(aggr_port_t * portp)456 stop_periodic_timer(aggr_port_t *portp)
457 {
458 aggr_lacp_port_t *pl = &portp->lp_lacp;
459 timeout_id_t id;
460
461 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
462
463 mutex_enter(&pl->lacp_timer_lock);
464 if ((id = pl->periodic_timer.id) != 0) {
465 pl->lacp_timer_bits &= ~LACP_PERIODIC_TIMEOUT;
466 pl->periodic_timer.id = 0;
467 }
468 mutex_exit(&pl->lacp_timer_lock);
469
470 if (id != 0)
471 (void) untimeout(id);
472 }
473
474 /*
475 * When the timer pops, we arrive here to
476 * clear out LACPDU count as well as transmit an
477 * LACPDU. We then set the periodic state and let
478 * the periodic state machine restart the timer.
479 */
480 static void
periodic_timer_pop(void * data)481 periodic_timer_pop(void *data)
482 {
483 aggr_port_t *portp = data;
484 aggr_lacp_port_t *pl = &portp->lp_lacp;
485
486 mutex_enter(&pl->lacp_timer_lock);
487 pl->lacp_timer_bits |= LACP_PERIODIC_TIMEOUT;
488 cv_broadcast(&pl->lacp_timer_cv);
489 mutex_exit(&pl->lacp_timer_lock);
490 }
491
492 /*
493 * When the timer pops, we arrive here to
494 * clear out LACPDU count as well as transmit an
495 * LACPDU. We then set the periodic state and let
496 * the periodic state machine restart the timer.
497 */
498 static void
periodic_timer_pop_handler(aggr_port_t * portp)499 periodic_timer_pop_handler(aggr_port_t *portp)
500 {
501 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
502
503 portp->lp_lacp_stats.LACPDUsTx = 0;
504
505 /* current timestamp */
506 portp->lp_lacp.time = gethrtime();
507 portp->lp_lacp.NTT = B_TRUE;
508 lacp_xmit_sm(portp);
509
510 /*
511 * Set Periodic State machine state based on the
512 * value of the Partner Operation Port State timeout
513 * bit.
514 */
515 if (portp->lp_lacp.PartnerOperPortState.bit.timeout) {
516 portp->lp_lacp.periodic_timer.val = FAST_PERIODIC_TIME;
517 portp->lp_lacp.sm.periodic_state = LACP_FAST_PERIODIC;
518 } else {
519 portp->lp_lacp.periodic_timer.val = SLOW_PERIODIC_TIME;
520 portp->lp_lacp.sm.periodic_state = LACP_SLOW_PERIODIC;
521 }
522
523 lacp_periodic_sm(portp);
524 }
525
526 /*
527 * Invoked from:
528 * - startup upon aggregation
529 * - when the periodic timer pops
530 * - when the periodic timer value is changed
531 * - when the port is attached or detached
532 * - when LACP mode is changed.
533 */
534 static void
lacp_periodic_sm(aggr_port_t * portp)535 lacp_periodic_sm(aggr_port_t *portp)
536 {
537 lacp_periodic_state_t oldstate = portp->lp_lacp.sm.periodic_state;
538 aggr_lacp_port_t *pl = &portp->lp_lacp;
539
540 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
541
542 /* LACP_OFF state not in specification so check here. */
543 if (!pl->sm.lacp_on) {
544 /* Stop timer whether it is running or not */
545 stop_periodic_timer(portp);
546 pl->sm.periodic_state = LACP_NO_PERIODIC;
547 pl->NTT = B_FALSE;
548 AGGR_LACP_DBG(("lacp_periodic_sm(%d):NO LACP "
549 "%s--->%s\n", portp->lp_linkid,
550 lacp_periodic_str[oldstate],
551 lacp_periodic_str[pl->sm.periodic_state]));
552 return;
553 }
554
555 if (pl->sm.begin || !pl->sm.lacp_enabled ||
556 !pl->sm.port_enabled ||
557 !pl->ActorOperPortState.bit.activity &&
558 !pl->PartnerOperPortState.bit.activity) {
559
560 /* Stop timer whether it is running or not */
561 stop_periodic_timer(portp);
562 pl->sm.periodic_state = LACP_NO_PERIODIC;
563 pl->NTT = B_FALSE;
564 AGGR_LACP_DBG(("lacp_periodic_sm(%d):STOP %s--->%s\n",
565 portp->lp_linkid, lacp_periodic_str[oldstate],
566 lacp_periodic_str[pl->sm.periodic_state]));
567 return;
568 }
569
570 /*
571 * Startup with FAST_PERIODIC_TIME if no previous LACPDU
572 * has been received. Then after we timeout, then it is
573 * possible to go to SLOW_PERIODIC_TIME.
574 */
575 if (pl->sm.periodic_state == LACP_NO_PERIODIC) {
576 pl->periodic_timer.val = FAST_PERIODIC_TIME;
577 pl->sm.periodic_state = LACP_FAST_PERIODIC;
578 } else if ((pl->sm.periodic_state == LACP_SLOW_PERIODIC) &&
579 pl->PartnerOperPortState.bit.timeout) {
580 /*
581 * If we receive a bit indicating we are going to
582 * fast periodic from slow periodic, stop the timer
583 * and let the periodic_timer_pop routine deal
584 * with reseting the periodic state and transmitting
585 * a LACPDU.
586 */
587 stop_periodic_timer(portp);
588 periodic_timer_pop_handler(portp);
589 }
590
591 /* Rearm timer with value provided by partner */
592 start_periodic_timer(portp);
593 }
594
595 /*
596 * This routine transmits an LACPDU if lacp_enabled
597 * is TRUE and if NTT is set.
598 */
599 static void
lacp_xmit_sm(aggr_port_t * portp)600 lacp_xmit_sm(aggr_port_t *portp)
601 {
602 aggr_lacp_port_t *pl = &portp->lp_lacp;
603 size_t len;
604 mblk_t *mp;
605 hrtime_t now, elapsed;
606
607 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
608
609 /* LACP_OFF state not in specification so check here. */
610 if (!pl->sm.lacp_on || !pl->NTT)
611 return;
612
613 /*
614 * Do nothing if LACP has been turned off or if the
615 * periodic state machine is not enabled.
616 */
617 if ((pl->sm.periodic_state == LACP_NO_PERIODIC) ||
618 !pl->sm.lacp_enabled || pl->sm.begin) {
619 pl->NTT = B_FALSE;
620 return;
621 }
622
623 /*
624 * If we have sent 5 Slow packets in the last second, avoid
625 * sending any more here. No more than three LACPDUs may be transmitted
626 * in any Fast_Periodic_Time interval.
627 */
628 if (portp->lp_lacp_stats.LACPDUsTx >= 3) {
629 /*
630 * Grab the current time value and see if
631 * more than 1 second has passed. If so,
632 * reset the timestamp and clear the count.
633 */
634 now = gethrtime();
635 elapsed = now - pl->time;
636 if (elapsed > NSECS_PER_SEC) {
637 portp->lp_lacp_stats.LACPDUsTx = 0;
638 pl->time = now;
639 } else {
640 return;
641 }
642 }
643
644 len = sizeof (lacp_t) + sizeof (struct ether_header);
645 mp = allocb(len, BPRI_MED);
646 if (mp == NULL)
647 return;
648
649 mp->b_wptr = mp->b_rptr + len;
650 bzero(mp->b_rptr, len);
651
652 fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
653 fill_lacp_pdu(portp,
654 (lacp_t *)(mp->b_rptr + sizeof (struct ether_header)));
655
656 /* Send the packet over the first TX ring */
657 mp = mac_hwring_send_priv(portp->lp_mch, portp->lp_tx_rings[0], mp);
658 if (mp != NULL)
659 freemsg(mp);
660
661 pl->NTT = B_FALSE;
662 portp->lp_lacp_stats.LACPDUsTx++;
663 }
664
665 /*
666 * Initialize the ethernet header of a LACP packet sent from the specified
667 * port.
668 */
669 static void
fill_lacp_ether(aggr_port_t * port,struct ether_header * ether)670 fill_lacp_ether(aggr_port_t *port, struct ether_header *ether)
671 {
672 bcopy(port->lp_addr, (uint8_t *)&(ether->ether_shost), ETHERADDRL);
673 bcopy(&slow_multicast_addr, (uint8_t *)&(ether->ether_dhost),
674 ETHERADDRL);
675 ether->ether_type = htons(ETHERTYPE_SLOW);
676 }
677
678 static void
fill_lacp_pdu(aggr_port_t * portp,lacp_t * lacp)679 fill_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
680 {
681 aggr_lacp_port_t *pl = &portp->lp_lacp;
682 aggr_grp_t *aggrp = portp->lp_grp;
683 mac_perim_handle_t pmph;
684
685 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
686 mac_perim_enter_by_mh(portp->lp_mh, &pmph);
687
688 lacp->subtype = LACP_SUBTYPE;
689 lacp->version = LACP_VERSION;
690
691 /*
692 * Actor Information
693 */
694 lacp->actor_info.tlv_type = ACTOR_TLV;
695 lacp->actor_info.information_len = sizeof (link_info_t);
696 lacp->actor_info.system_priority =
697 htons(aggrp->aggr.ActorSystemPriority);
698 bcopy(aggrp->lg_addr, (uchar_t *)&lacp->actor_info.system_id,
699 ETHERADDRL);
700 lacp->actor_info.key = htons(pl->ActorOperPortKey);
701 lacp->actor_info.port_priority = htons(pl->ActorPortPriority);
702 lacp->actor_info.port = htons(pl->ActorPortNumber);
703 lacp->actor_info.state.state = pl->ActorOperPortState.state;
704
705 /*
706 * Partner Information
707 */
708 lacp->partner_info.tlv_type = PARTNER_TLV;
709 lacp->partner_info.information_len = sizeof (link_info_t);
710 lacp->partner_info.system_priority =
711 htons(pl->PartnerOperSysPriority);
712 lacp->partner_info.system_id = pl->PartnerOperSystem;
713 lacp->partner_info.key = htons(pl->PartnerOperKey);
714 lacp->partner_info.port_priority =
715 htons(pl->PartnerOperPortPriority);
716 lacp->partner_info.port = htons(pl->PartnerOperPortNum);
717 lacp->partner_info.state.state = pl->PartnerOperPortState.state;
718
719 /* Collector Information */
720 lacp->tlv_collector = COLLECTOR_TLV;
721 lacp->collector_len = 0x10;
722 lacp->collector_max_delay = htons(aggrp->aggr.CollectorMaxDelay);
723
724 /* Termination Information */
725 lacp->tlv_terminator = TERMINATOR_TLV;
726 lacp->terminator_len = 0x0;
727
728 mac_perim_exit(pmph);
729 }
730
731 /*
732 * lacp_mux_sm - LACP mux state machine
733 * This state machine is invoked from:
734 * - startup upon aggregation
735 * - from the Selection logic
736 * - when the wait_while_timer pops
737 * - when the aggregation MAC address is changed
738 * - when receiving DL_NOTE_LINK_UP/DOWN
739 * - when receiving DL_NOTE_AGGR_AVAIL/UNAVAIL
740 * - when LACP mode is changed.
741 * - when a DL_NOTE_SPEED is received
742 */
743 static void
lacp_mux_sm(aggr_port_t * portp)744 lacp_mux_sm(aggr_port_t *portp)
745 {
746 aggr_grp_t *aggrp = portp->lp_grp;
747 boolean_t NTT_updated = B_FALSE;
748 aggr_lacp_port_t *pl = &portp->lp_lacp;
749 lacp_mux_state_t oldstate = pl->sm.mux_state;
750
751 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
752
753 /* LACP_OFF state not in specification so check here. */
754 if (!pl->sm.lacp_on) {
755 pl->sm.mux_state = LACP_DETACHED;
756 pl->ActorOperPortState.bit.sync = B_FALSE;
757
758 if (pl->ActorOperPortState.bit.collecting ||
759 pl->ActorOperPortState.bit.distributing) {
760 AGGR_LACP_DBG(("trunk link: (%d): "
761 "Collector_Distributor Disabled.\n",
762 portp->lp_linkid));
763 }
764
765 pl->ActorOperPortState.bit.collecting =
766 pl->ActorOperPortState.bit.distributing = B_FALSE;
767 return;
768 }
769
770 if (pl->sm.begin || !pl->sm.lacp_enabled)
771 pl->sm.mux_state = LACP_DETACHED;
772
773 again:
774 /* determine next state, or return if state unchanged */
775 switch (pl->sm.mux_state) {
776 case LACP_DETACHED:
777 if (pl->sm.begin) {
778 break;
779 }
780
781 if ((pl->sm.selected == AGGR_SELECTED) ||
782 (pl->sm.selected == AGGR_STANDBY)) {
783 pl->sm.mux_state = LACP_WAITING;
784 break;
785 }
786 return;
787
788 case LACP_WAITING:
789 if (pl->sm.selected == AGGR_UNSELECTED) {
790 pl->sm.mux_state = LACP_DETACHED;
791 break;
792 }
793
794 if ((pl->sm.selected == AGGR_SELECTED) && aggrp->aggr.ready) {
795 pl->sm.mux_state = LACP_ATTACHED;
796 break;
797 }
798 return;
799
800 case LACP_ATTACHED:
801 if ((pl->sm.selected == AGGR_UNSELECTED) ||
802 (pl->sm.selected == AGGR_STANDBY)) {
803 pl->sm.mux_state = LACP_DETACHED;
804 break;
805 }
806
807 if ((pl->sm.selected == AGGR_SELECTED) &&
808 pl->PartnerOperPortState.bit.sync) {
809 pl->sm.mux_state = LACP_COLLECTING_DISTRIBUTING;
810 break;
811 }
812 return;
813
814 case LACP_COLLECTING_DISTRIBUTING:
815 if ((pl->sm.selected == AGGR_UNSELECTED) ||
816 (pl->sm.selected == AGGR_STANDBY) ||
817 !pl->PartnerOperPortState.bit.sync) {
818 pl->sm.mux_state = LACP_ATTACHED;
819 break;
820 }
821 return;
822 }
823
824 AGGR_LACP_DBG(("lacp_mux_sm(%d):%s--->%s\n",
825 portp->lp_linkid, lacp_mux_str[oldstate],
826 lacp_mux_str[pl->sm.mux_state]));
827
828 /* perform actions on entering a new state */
829 switch (pl->sm.mux_state) {
830 case LACP_DETACHED:
831 if (pl->ActorOperPortState.bit.collecting ||
832 pl->ActorOperPortState.bit.distributing) {
833 AGGR_LACP_DBG(("trunk link: (%d): "
834 "Collector_Distributor Disabled.\n",
835 portp->lp_linkid));
836 }
837
838 pl->ActorOperPortState.bit.sync =
839 pl->ActorOperPortState.bit.collecting = B_FALSE;
840
841 /* Turn OFF Collector_Distributor */
842 aggr_set_coll_dist(portp, B_FALSE);
843
844 pl->ActorOperPortState.bit.distributing = B_FALSE;
845 NTT_updated = B_TRUE;
846 break;
847
848 case LACP_WAITING:
849 start_wait_while_timer(portp);
850 break;
851
852 case LACP_ATTACHED:
853 if (pl->ActorOperPortState.bit.collecting ||
854 pl->ActorOperPortState.bit.distributing) {
855 AGGR_LACP_DBG(("trunk link: (%d): "
856 "Collector_Distributor Disabled.\n",
857 portp->lp_linkid));
858 }
859
860 pl->ActorOperPortState.bit.sync = B_TRUE;
861 pl->ActorOperPortState.bit.collecting = B_FALSE;
862
863 /* Turn OFF Collector_Distributor */
864 aggr_set_coll_dist(portp, B_FALSE);
865
866 pl->ActorOperPortState.bit.distributing = B_FALSE;
867 NTT_updated = B_TRUE;
868 if (pl->PartnerOperPortState.bit.sync) {
869 /*
870 * We had already received an updated sync from
871 * the partner. Attempt to transition to
872 * collecting/distributing now.
873 */
874 goto again;
875 }
876 break;
877
878 case LACP_COLLECTING_DISTRIBUTING:
879 if (!pl->ActorOperPortState.bit.collecting &&
880 !pl->ActorOperPortState.bit.distributing) {
881 AGGR_LACP_DBG(("trunk link: (%d): "
882 "Collector_Distributor Enabled.\n",
883 portp->lp_linkid));
884 }
885 pl->ActorOperPortState.bit.distributing = B_TRUE;
886
887 /* Turn Collector_Distributor back ON */
888 aggr_set_coll_dist(portp, B_TRUE);
889
890 pl->ActorOperPortState.bit.collecting = B_TRUE;
891 NTT_updated = B_TRUE;
892 break;
893 }
894
895 /*
896 * If we updated the state of the NTT variable, then
897 * initiate a LACPDU transmission.
898 */
899 if (NTT_updated) {
900 pl->NTT = B_TRUE;
901 lacp_xmit_sm(portp);
902 }
903 } /* lacp_mux_sm */
904
905
906 static int
receive_marker_pdu(aggr_port_t * portp,mblk_t * mp)907 receive_marker_pdu(aggr_port_t *portp, mblk_t *mp)
908 {
909 marker_pdu_t *markerp = (marker_pdu_t *)mp->b_rptr;
910
911 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
912
913 AGGR_LACP_DBG(("trunk link: (%d): MARKER PDU received:\n",
914 portp->lp_linkid));
915
916 /* LACP_OFF state not in specification so check here. */
917 if (!portp->lp_lacp.sm.lacp_on)
918 return (-1);
919
920 if (MBLKL(mp) < sizeof (marker_pdu_t))
921 return (-1);
922
923 if (markerp->version != MARKER_VERSION) {
924 AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
925 "version = %d does not match s/w version %d\n",
926 portp->lp_linkid, markerp->version, MARKER_VERSION));
927 return (-1);
928 }
929
930 if (markerp->tlv_marker == MARKER_RESPONSE_TLV) {
931 /* We do not yet send out MARKER info PDUs */
932 AGGR_LACP_DBG(("trunk link (%d): MARKER RESPONSE PDU: "
933 " MARKER TLV = %d - We don't send out info type!\n",
934 portp->lp_linkid, markerp->tlv_marker));
935 return (-1);
936 }
937
938 if (markerp->tlv_marker != MARKER_INFO_TLV) {
939 AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
940 " MARKER TLV = %d \n", portp->lp_linkid,
941 markerp->tlv_marker));
942 return (-1);
943 }
944
945 if (markerp->marker_len != MARKER_INFO_RESPONSE_LENGTH) {
946 AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
947 " MARKER length = %d \n", portp->lp_linkid,
948 markerp->marker_len));
949 return (-1);
950 }
951
952 if (markerp->requestor_port != portp->lp_lacp.PartnerOperPortNum) {
953 AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: "
954 " MARKER Port %d not equal to Partner port %d\n",
955 portp->lp_linkid, markerp->requestor_port,
956 portp->lp_lacp.PartnerOperPortNum));
957 return (-1);
958 }
959
960 if (ether_cmp(&markerp->system_id,
961 &portp->lp_lacp.PartnerOperSystem) != 0) {
962 AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: "
963 " MARKER MAC not equal to Partner MAC\n",
964 portp->lp_linkid));
965 return (-1);
966 }
967
968 /*
969 * Turn into Marker Response PDU
970 * and return mblk to sending system
971 */
972 markerp->tlv_marker = MARKER_RESPONSE_TLV;
973
974 /* reuse the space that was used by received ethernet header */
975 ASSERT(MBLKHEAD(mp) >= sizeof (struct ether_header));
976 mp->b_rptr -= sizeof (struct ether_header);
977 fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
978 return (0);
979 }
980
981 /*
982 * Update the LACP mode (off, active, or passive) of the specified group.
983 */
984 void
aggr_lacp_update_mode(aggr_grp_t * grp,aggr_lacp_mode_t mode)985 aggr_lacp_update_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode)
986 {
987 aggr_lacp_mode_t old_mode = grp->lg_lacp_mode;
988 aggr_port_t *port;
989
990 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
991 ASSERT(!grp->lg_closing);
992
993 if (mode == old_mode)
994 return;
995
996 grp->lg_lacp_mode = mode;
997
998 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
999 port->lp_lacp.ActorAdminPortState.bit.activity =
1000 port->lp_lacp.ActorOperPortState.bit.activity =
1001 (mode == AGGR_LACP_ACTIVE);
1002
1003 if (old_mode == AGGR_LACP_OFF) {
1004 /* OFF -> {PASSIVE,ACTIVE} */
1005 /* turn OFF Collector_Distributor */
1006 aggr_set_coll_dist(port, B_FALSE);
1007 lacp_on(port);
1008 } else if (mode == AGGR_LACP_OFF) {
1009 /* {PASSIVE,ACTIVE} -> OFF */
1010 lacp_off(port);
1011 /* Turn ON Collector_Distributor */
1012 aggr_set_coll_dist(port, B_TRUE);
1013 } else {
1014 /* PASSIVE->ACTIVE or ACTIVE->PASSIVE */
1015 port->lp_lacp.sm.begin = B_TRUE;
1016 lacp_mux_sm(port);
1017 lacp_periodic_sm(port);
1018
1019 /* kick off state machines */
1020 lacp_receive_sm(port, NULL);
1021 lacp_mux_sm(port);
1022 }
1023 }
1024 }
1025
1026
1027 /*
1028 * Update the LACP timer (short or long) of the specified group.
1029 */
1030 void
aggr_lacp_update_timer(aggr_grp_t * grp,aggr_lacp_timer_t timer)1031 aggr_lacp_update_timer(aggr_grp_t *grp, aggr_lacp_timer_t timer)
1032 {
1033 aggr_port_t *port;
1034
1035 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1036
1037 if (timer == grp->aggr.PeriodicTimer)
1038 return;
1039
1040 grp->aggr.PeriodicTimer = timer;
1041
1042 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1043 port->lp_lacp.ActorAdminPortState.bit.timeout =
1044 port->lp_lacp.ActorOperPortState.bit.timeout =
1045 (timer == AGGR_LACP_TIMER_SHORT);
1046 }
1047 }
1048
1049 void
aggr_port_lacp_set_mode(aggr_grp_t * grp,aggr_port_t * port)1050 aggr_port_lacp_set_mode(aggr_grp_t *grp, aggr_port_t *port)
1051 {
1052 aggr_lacp_mode_t mode;
1053 aggr_lacp_timer_t timer;
1054
1055 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1056
1057 mode = grp->lg_lacp_mode;
1058 timer = grp->aggr.PeriodicTimer;
1059
1060 port->lp_lacp.ActorAdminPortState.bit.activity =
1061 port->lp_lacp.ActorOperPortState.bit.activity =
1062 (mode == AGGR_LACP_ACTIVE);
1063
1064 port->lp_lacp.ActorAdminPortState.bit.timeout =
1065 port->lp_lacp.ActorOperPortState.bit.timeout =
1066 (timer == AGGR_LACP_TIMER_SHORT);
1067
1068 if (mode == AGGR_LACP_OFF) {
1069 /* Turn ON Collector_Distributor */
1070 aggr_set_coll_dist(port, B_TRUE);
1071 } else { /* LACP_ACTIVE/PASSIVE */
1072 lacp_on(port);
1073 }
1074 }
1075
1076 /*
1077 * Sets the initial LACP mode (off, active, passive) and LACP timer
1078 * (short, long) of the specified group.
1079 */
1080 void
aggr_lacp_set_mode(aggr_grp_t * grp,aggr_lacp_mode_t mode,aggr_lacp_timer_t timer)1081 aggr_lacp_set_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode,
1082 aggr_lacp_timer_t timer)
1083 {
1084 aggr_port_t *port;
1085
1086 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1087
1088 grp->lg_lacp_mode = mode;
1089 grp->aggr.PeriodicTimer = timer;
1090
1091 for (port = grp->lg_ports; port != NULL; port = port->lp_next)
1092 aggr_port_lacp_set_mode(grp, port);
1093 }
1094
1095 /*
1096 * Verify that the Partner MAC and Key recorded by the specified
1097 * port are not found in other ports that are not part of our
1098 * aggregation. Returns B_TRUE if such a port is found, B_FALSE
1099 * otherwise.
1100 */
1101 static boolean_t
lacp_misconfig_check(aggr_port_t * portp)1102 lacp_misconfig_check(aggr_port_t *portp)
1103 {
1104 aggr_grp_t *grp = portp->lp_grp;
1105 lacp_sel_ports_t *cport;
1106
1107 mutex_enter(&lacp_sel_lock);
1108
1109 for (cport = sel_ports; cport != NULL; cport = cport->sp_next) {
1110
1111 /* skip entries of the group of the port being checked */
1112 if (cport->sp_grp_linkid == grp->lg_linkid)
1113 continue;
1114
1115 if ((ether_cmp(&cport->sp_partner_system,
1116 &grp->aggr.PartnerSystem) == 0) &&
1117 (cport->sp_partner_key == grp->aggr.PartnerOperAggrKey)) {
1118 char mac_str[ETHERADDRL*3];
1119 struct ether_addr *mac = &cport->sp_partner_system;
1120
1121 /*
1122 * The Partner port information is already in use
1123 * by ports in another aggregation so disable this
1124 * port.
1125 */
1126
1127 (void) snprintf(mac_str, sizeof (mac_str),
1128 "%x:%x:%x:%x:%x:%x",
1129 mac->ether_addr_octet[0], mac->ether_addr_octet[1],
1130 mac->ether_addr_octet[2], mac->ether_addr_octet[3],
1131 mac->ether_addr_octet[4], mac->ether_addr_octet[5]);
1132
1133 portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
1134
1135 cmn_err(CE_NOTE, "aggr %d port %d: Port Partner "
1136 "MAC %s and key %d in use on aggregation %d "
1137 "port %d\n", grp->lg_linkid, portp->lp_linkid,
1138 mac_str, portp->lp_lacp.PartnerOperKey,
1139 cport->sp_grp_linkid, cport->sp_linkid);
1140 break;
1141 }
1142 }
1143
1144 mutex_exit(&lacp_sel_lock);
1145 return (cport != NULL);
1146 }
1147
1148 /*
1149 * Remove the specified port from the list of selected ports.
1150 */
1151 static void
lacp_sel_ports_del(aggr_port_t * portp)1152 lacp_sel_ports_del(aggr_port_t *portp)
1153 {
1154 lacp_sel_ports_t *cport, **prev = NULL;
1155
1156 mutex_enter(&lacp_sel_lock);
1157
1158 prev = &sel_ports;
1159 for (cport = sel_ports; cport != NULL; prev = &cport->sp_next,
1160 cport = cport->sp_next) {
1161 if (portp->lp_linkid == cport->sp_linkid)
1162 break;
1163 }
1164
1165 if (cport == NULL) {
1166 mutex_exit(&lacp_sel_lock);
1167 return;
1168 }
1169
1170 *prev = cport->sp_next;
1171 kmem_free(cport, sizeof (*cport));
1172
1173 mutex_exit(&lacp_sel_lock);
1174 }
1175
1176 /*
1177 * Add the specified port to the list of selected ports. Returns B_FALSE
1178 * if the operation could not be performed due to an memory allocation
1179 * error.
1180 */
1181 static boolean_t
lacp_sel_ports_add(aggr_port_t * portp)1182 lacp_sel_ports_add(aggr_port_t *portp)
1183 {
1184 lacp_sel_ports_t *new_port;
1185 lacp_sel_ports_t *cport, **last;
1186
1187 mutex_enter(&lacp_sel_lock);
1188
1189 /* check if port is already in the list */
1190 last = &sel_ports;
1191 for (cport = sel_ports; cport != NULL;
1192 last = &cport->sp_next, cport = cport->sp_next) {
1193 if (portp->lp_linkid == cport->sp_linkid) {
1194 ASSERT(cport->sp_partner_key ==
1195 portp->lp_lacp.PartnerOperKey);
1196 ASSERT(ether_cmp(&cport->sp_partner_system,
1197 &portp->lp_lacp.PartnerOperSystem) == 0);
1198
1199 mutex_exit(&lacp_sel_lock);
1200 return (B_TRUE);
1201 }
1202 }
1203
1204 /* create and initialize new entry */
1205 new_port = kmem_zalloc(sizeof (lacp_sel_ports_t), KM_NOSLEEP);
1206 if (new_port == NULL) {
1207 mutex_exit(&lacp_sel_lock);
1208 return (B_FALSE);
1209 }
1210
1211 new_port->sp_grp_linkid = portp->lp_grp->lg_linkid;
1212 bcopy(&portp->lp_lacp.PartnerOperSystem,
1213 &new_port->sp_partner_system, sizeof (new_port->sp_partner_system));
1214 new_port->sp_partner_key = portp->lp_lacp.PartnerOperKey;
1215 new_port->sp_linkid = portp->lp_linkid;
1216
1217 *last = new_port;
1218
1219 mutex_exit(&lacp_sel_lock);
1220 return (B_TRUE);
1221 }
1222
1223 /*
1224 * lacp_selection_logic - LACP selection logic
1225 * Sets the selected variable on a per port basis
1226 * and sets Ready when all waiting ports are ready
1227 * to go online.
1228 *
1229 * parameters:
1230 * - portp - instance this applies to.
1231 *
1232 * invoked:
1233 * - when initialization is needed
1234 * - when UNSELECTED is set from the lacp_receive_sm() in LACP_CURRENT state
1235 * - When the lacp_receive_sm goes to the LACP_DEFAULTED state
1236 * - every time the wait_while_timer pops
1237 * - everytime we turn LACP on/off
1238 */
1239 static void
lacp_selection_logic(aggr_port_t * portp)1240 lacp_selection_logic(aggr_port_t *portp)
1241 {
1242 aggr_port_t *tpp;
1243 aggr_grp_t *aggrp = portp->lp_grp;
1244 int ports_waiting;
1245 boolean_t reset_mac = B_FALSE;
1246 aggr_lacp_port_t *pl = &portp->lp_lacp;
1247
1248 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
1249
1250 /* LACP_OFF state not in specification so check here. */
1251 if (!pl->sm.lacp_on) {
1252 lacp_port_unselect(portp);
1253 aggrp->aggr.ready = B_FALSE;
1254 lacp_mux_sm(portp);
1255 return;
1256 }
1257
1258 if (pl->sm.begin || !pl->sm.lacp_enabled ||
1259 (portp->lp_state != AGGR_PORT_STATE_ATTACHED)) {
1260
1261 AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1262 "selected %d-->%d (begin=%d, lacp_enabled = %d, "
1263 "lp_state=%d)\n", portp->lp_linkid, pl->sm.selected,
1264 AGGR_UNSELECTED, pl->sm.begin, pl->sm.lacp_enabled,
1265 portp->lp_state));
1266
1267 lacp_port_unselect(portp);
1268 aggrp->aggr.ready = B_FALSE;
1269 lacp_mux_sm(portp);
1270 return;
1271 }
1272
1273 /*
1274 * If LACP is not enabled then selected is never set.
1275 */
1276 if (!pl->sm.lacp_enabled) {
1277 AGGR_LACP_DBG(("lacp_selection_logic:(%d): selected %d-->%d\n",
1278 portp->lp_linkid, pl->sm.selected, AGGR_UNSELECTED));
1279
1280 lacp_port_unselect(portp);
1281 lacp_mux_sm(portp);
1282 return;
1283 }
1284
1285 /*
1286 * Check if the Partner MAC or Key are zero. If so, we have
1287 * not received any LACP info or it has expired and the
1288 * receive machine is in the LACP_DEFAULTED state.
1289 */
1290 if (ether_cmp(&pl->PartnerOperSystem, ðerzeroaddr) == 0 ||
1291 (pl->PartnerOperKey == 0)) {
1292
1293 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1294 if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1295 ðerzeroaddr) != 0 &&
1296 (tpp->lp_lacp.PartnerOperKey != 0))
1297 break;
1298 }
1299
1300 /*
1301 * If all ports have no key or aggregation address,
1302 * then clear the negotiated Partner MAC and key.
1303 */
1304 if (tpp == NULL) {
1305 /* Clear the aggregation Partner MAC and key */
1306 aggrp->aggr.PartnerSystem = etherzeroaddr;
1307 aggrp->aggr.PartnerOperAggrKey = 0;
1308 }
1309
1310 return;
1311 }
1312
1313 /*
1314 * Insure that at least one port in the aggregation
1315 * matches the Partner aggregation MAC and key. If not,
1316 * then clear the aggregation MAC and key. Later we will
1317 * set the Partner aggregation MAC and key to that of the
1318 * current port's Partner MAC and key.
1319 */
1320 if (ether_cmp(&pl->PartnerOperSystem,
1321 &aggrp->aggr.PartnerSystem) != 0 ||
1322 (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1323
1324 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1325 if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1326 &aggrp->aggr.PartnerSystem) == 0 &&
1327 (tpp->lp_lacp.PartnerOperKey ==
1328 aggrp->aggr.PartnerOperAggrKey)) {
1329 /* Set aggregation Partner MAC and key */
1330 aggrp->aggr.PartnerSystem =
1331 pl->PartnerOperSystem;
1332 aggrp->aggr.PartnerOperAggrKey =
1333 pl->PartnerOperKey;
1334 break;
1335 }
1336 }
1337
1338 if (tpp == NULL) {
1339 /* Clear the aggregation Partner MAC and key */
1340 aggrp->aggr.PartnerSystem = etherzeroaddr;
1341 aggrp->aggr.PartnerOperAggrKey = 0;
1342 reset_mac = B_TRUE;
1343 }
1344 }
1345
1346 /*
1347 * If our Actor MAC is found in the Partner MAC
1348 * on this port then we have a loopback misconfiguration.
1349 */
1350 if (ether_cmp(&pl->PartnerOperSystem,
1351 (struct ether_addr *)&aggrp->lg_addr) == 0) {
1352 cmn_err(CE_NOTE, "trunk link: (%d): Loopback condition.\n",
1353 portp->lp_linkid);
1354
1355 lacp_port_unselect(portp);
1356 lacp_mux_sm(portp);
1357 return;
1358 }
1359
1360 /*
1361 * If our Partner MAC and Key are found on any other
1362 * ports that are not in our aggregation, we have
1363 * a misconfiguration.
1364 */
1365 if (lacp_misconfig_check(portp)) {
1366 lacp_mux_sm(portp);
1367 return;
1368 }
1369
1370 /*
1371 * If the Aggregation Partner MAC and Key have not been
1372 * set, then this is either the first port or the aggregation
1373 * MAC and key have been reset. In either case we must set
1374 * the values of the Partner MAC and key.
1375 */
1376 if (ether_cmp(&aggrp->aggr.PartnerSystem, ðerzeroaddr) == 0 &&
1377 (aggrp->aggr.PartnerOperAggrKey == 0)) {
1378 /* Set aggregation Partner MAC and key */
1379 aggrp->aggr.PartnerSystem = pl->PartnerOperSystem;
1380 aggrp->aggr.PartnerOperAggrKey = pl->PartnerOperKey;
1381
1382 /*
1383 * If we reset Partner aggregation MAC, then restart
1384 * selection_logic on ports that match new MAC address.
1385 */
1386 if (reset_mac) {
1387 for (tpp = aggrp->lg_ports; tpp; tpp =
1388 tpp->lp_next) {
1389 if (tpp == portp)
1390 continue;
1391 if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1392 &aggrp->aggr.PartnerSystem) == 0 &&
1393 (tpp->lp_lacp.PartnerOperKey ==
1394 aggrp->aggr.PartnerOperAggrKey))
1395 lacp_selection_logic(tpp);
1396 }
1397 }
1398 } else if (ether_cmp(&pl->PartnerOperSystem,
1399 &aggrp->aggr.PartnerSystem) != 0 ||
1400 (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1401 /*
1402 * The Partner port information does not match
1403 * that of the other ports in the aggregation
1404 * so disable this port.
1405 */
1406 lacp_port_unselect(portp);
1407
1408 cmn_err(CE_NOTE, "trunk link: (%d): Port Partner MAC "
1409 "or key (%d) incompatible with Aggregation Partner "
1410 "MAC or key (%d)\n", portp->lp_linkid, pl->PartnerOperKey,
1411 aggrp->aggr.PartnerOperAggrKey);
1412
1413 lacp_mux_sm(portp);
1414 return;
1415 }
1416
1417 /* If we get to here, automatically set selected */
1418 if (pl->sm.selected != AGGR_SELECTED) {
1419 AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1420 "selected %d-->%d\n", portp->lp_linkid,
1421 pl->sm.selected, AGGR_SELECTED));
1422 if (!lacp_port_select(portp))
1423 return;
1424 lacp_mux_sm(portp);
1425 }
1426
1427 /*
1428 * From this point onward we have selected the port
1429 * and are simply checking if the Ready flag should
1430 * be set.
1431 */
1432
1433 /*
1434 * If at least two ports are waiting to aggregate
1435 * and ready_n is set on all ports waiting to aggregate
1436 * then set READY for the aggregation.
1437 */
1438
1439 ports_waiting = 0;
1440
1441 if (!aggrp->aggr.ready) {
1442 /*
1443 * If all ports in the aggregation have received compatible
1444 * partner information and they match up correctly with the
1445 * switch, there is no need to wait for all the
1446 * wait_while_timers to pop.
1447 */
1448 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1449 if (((tpp->lp_lacp.sm.mux_state == LACP_WAITING) ||
1450 tpp->lp_lacp.sm.begin) &&
1451 !tpp->lp_lacp.PartnerOperPortState.bit.sync) {
1452 /* Add up ports uninitialized or waiting */
1453 ports_waiting++;
1454 if (!tpp->lp_lacp.sm.ready_n) {
1455 DTRACE_PROBE1(port___not__ready,
1456 aggr_port_t *, tpp);
1457 return;
1458 }
1459 }
1460 }
1461 }
1462
1463 if (aggrp->aggr.ready) {
1464 AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1465 "aggr.ready already set\n", portp->lp_linkid));
1466 lacp_mux_sm(portp);
1467 } else {
1468 AGGR_LACP_DBG(("lacp_selection_logic:(%d): Ready %d-->%d\n",
1469 portp->lp_linkid, aggrp->aggr.ready, B_TRUE));
1470 aggrp->aggr.ready = B_TRUE;
1471
1472 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next)
1473 lacp_mux_sm(tpp);
1474 }
1475
1476 }
1477
1478 /*
1479 * wait_while_timer_pop - When the timer pops, we arrive here to
1480 * set ready_n and trigger the selection logic.
1481 */
1482 static void
wait_while_timer_pop(void * data)1483 wait_while_timer_pop(void *data)
1484 {
1485 aggr_port_t *portp = data;
1486 aggr_lacp_port_t *pl = &portp->lp_lacp;
1487
1488 mutex_enter(&pl->lacp_timer_lock);
1489 pl->lacp_timer_bits |= LACP_WAIT_WHILE_TIMEOUT;
1490 cv_broadcast(&pl->lacp_timer_cv);
1491 mutex_exit(&pl->lacp_timer_lock);
1492 }
1493
1494 /*
1495 * wait_while_timer_pop_handler - When the timer pops, we arrive here to
1496 * set ready_n and trigger the selection logic.
1497 */
1498 static void
wait_while_timer_pop_handler(aggr_port_t * portp)1499 wait_while_timer_pop_handler(aggr_port_t *portp)
1500 {
1501 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1502
1503 AGGR_LACP_DBG(("trunk link:(%d): wait_while_timer pop \n",
1504 portp->lp_linkid));
1505 portp->lp_lacp.sm.ready_n = B_TRUE;
1506
1507 lacp_selection_logic(portp);
1508 }
1509
1510 static void
start_wait_while_timer(aggr_port_t * portp)1511 start_wait_while_timer(aggr_port_t *portp)
1512 {
1513 aggr_lacp_port_t *pl = &portp->lp_lacp;
1514
1515 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1516
1517 mutex_enter(&pl->lacp_timer_lock);
1518 if (pl->wait_while_timer.id == 0) {
1519 pl->wait_while_timer.id =
1520 timeout(wait_while_timer_pop, portp,
1521 drv_usectohz(1000000 *
1522 portp->lp_lacp.wait_while_timer.val));
1523 }
1524 mutex_exit(&pl->lacp_timer_lock);
1525 }
1526
1527
1528 static void
stop_wait_while_timer(aggr_port_t * portp)1529 stop_wait_while_timer(aggr_port_t *portp)
1530 {
1531 aggr_lacp_port_t *pl = &portp->lp_lacp;
1532 timeout_id_t id;
1533
1534 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1535
1536 mutex_enter(&pl->lacp_timer_lock);
1537 if ((id = pl->wait_while_timer.id) != 0) {
1538 pl->lacp_timer_bits &= ~LACP_WAIT_WHILE_TIMEOUT;
1539 pl->wait_while_timer.id = 0;
1540 }
1541 mutex_exit(&pl->lacp_timer_lock);
1542
1543 if (id != 0)
1544 (void) untimeout(id);
1545 }
1546
1547 /*
1548 * Invoked when a port has been attached to a group.
1549 * Complete the processing that couldn't be finished from lacp_on()
1550 * because the port was not started. We know that the link is full
1551 * duplex and ON, otherwise it wouldn't be attached.
1552 */
1553 void
aggr_lacp_port_attached(aggr_port_t * portp)1554 aggr_lacp_port_attached(aggr_port_t *portp)
1555 {
1556 aggr_grp_t *grp = portp->lp_grp;
1557 aggr_lacp_port_t *pl = &portp->lp_lacp;
1558
1559 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1560 ASSERT(MAC_PERIM_HELD(portp->lp_mh));
1561 ASSERT(portp->lp_state == AGGR_PORT_STATE_ATTACHED);
1562
1563 AGGR_LACP_DBG(("aggr_lacp_port_attached: port %d\n",
1564 portp->lp_linkid));
1565
1566 portp->lp_lacp.sm.port_enabled = B_TRUE; /* link on */
1567
1568 if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1569 return;
1570
1571 pl->sm.lacp_enabled = B_TRUE;
1572 pl->ActorOperPortState.bit.aggregation = B_TRUE;
1573 pl->sm.begin = B_TRUE;
1574
1575 lacp_receive_sm(portp, NULL);
1576 lacp_mux_sm(portp);
1577
1578 /* Enable Multicast Slow Protocol address */
1579 aggr_lacp_mcast_on(portp);
1580
1581 /* periodic_sm is started up from the receive machine */
1582 lacp_selection_logic(portp);
1583 }
1584
1585 /*
1586 * Invoked when a port has been detached from a group. Turn off
1587 * LACP processing if it was enabled.
1588 */
1589 void
aggr_lacp_port_detached(aggr_port_t * portp)1590 aggr_lacp_port_detached(aggr_port_t *portp)
1591 {
1592 aggr_grp_t *grp = portp->lp_grp;
1593
1594 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1595 ASSERT(MAC_PERIM_HELD(portp->lp_mh));
1596
1597 AGGR_LACP_DBG(("aggr_lacp_port_detached: port %d\n",
1598 portp->lp_linkid));
1599
1600 portp->lp_lacp.sm.port_enabled = B_FALSE;
1601
1602 if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1603 return;
1604
1605 portp->lp_lacp.sm.lacp_enabled = B_FALSE;
1606 lacp_selection_logic(portp);
1607 lacp_mux_sm(portp);
1608 lacp_periodic_sm(portp);
1609
1610 /*
1611 * Disable Slow Protocol Timers.
1612 */
1613 stop_periodic_timer(portp);
1614 stop_current_while_timer(portp);
1615 stop_wait_while_timer(portp);
1616
1617 /* Disable Multicast Slow Protocol address */
1618 aggr_lacp_mcast_off(portp);
1619 aggr_set_coll_dist(portp, B_FALSE);
1620 }
1621
1622 /*
1623 * Enable Slow Protocol LACP and Marker PDUs.
1624 */
1625 static void
lacp_on(aggr_port_t * portp)1626 lacp_on(aggr_port_t *portp)
1627 {
1628 aggr_lacp_port_t *pl = &portp->lp_lacp;
1629 mac_perim_handle_t mph;
1630
1631 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1632
1633 mac_perim_enter_by_mh(portp->lp_mh, &mph);
1634
1635 /*
1636 * Reset the state machines and Partner operational
1637 * information. Careful to not reset things like
1638 * our link state.
1639 */
1640 lacp_reset_port(portp);
1641 pl->sm.lacp_on = B_TRUE;
1642
1643 AGGR_LACP_DBG(("lacp_on:(%d): \n", portp->lp_linkid));
1644
1645 if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) {
1646 pl->sm.port_enabled = B_TRUE;
1647 pl->sm.lacp_enabled = B_TRUE;
1648 pl->ActorOperPortState.bit.aggregation = B_TRUE;
1649 }
1650
1651 lacp_receive_sm(portp, NULL);
1652 lacp_mux_sm(portp);
1653
1654 if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) {
1655 /* Enable Multicast Slow Protocol address */
1656 aggr_lacp_mcast_on(portp);
1657
1658 /* periodic_sm is started up from the receive machine */
1659 lacp_selection_logic(portp);
1660 }
1661 done:
1662 mac_perim_exit(mph);
1663 } /* lacp_on */
1664
1665 /* Disable Slow Protocol LACP and Marker PDUs */
1666 static void
lacp_off(aggr_port_t * portp)1667 lacp_off(aggr_port_t *portp)
1668 {
1669 aggr_lacp_port_t *pl = &portp->lp_lacp;
1670 mac_perim_handle_t mph;
1671
1672 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1673 mac_perim_enter_by_mh(portp->lp_mh, &mph);
1674
1675 pl->sm.lacp_on = B_FALSE;
1676
1677 AGGR_LACP_DBG(("lacp_off:(%d): \n", portp->lp_linkid));
1678
1679 if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) {
1680 /*
1681 * Disable Slow Protocol Timers.
1682 */
1683 stop_periodic_timer(portp);
1684 stop_current_while_timer(portp);
1685 stop_wait_while_timer(portp);
1686
1687 /* Disable Multicast Slow Protocol address */
1688 aggr_lacp_mcast_off(portp);
1689
1690 pl->sm.port_enabled = B_FALSE;
1691 pl->sm.lacp_enabled = B_FALSE;
1692 pl->ActorOperPortState.bit.aggregation = B_FALSE;
1693 }
1694
1695 lacp_mux_sm(portp);
1696 lacp_periodic_sm(portp);
1697 lacp_selection_logic(portp);
1698
1699 /* Turn OFF Collector_Distributor */
1700 aggr_set_coll_dist(portp, B_FALSE);
1701
1702 lacp_reset_port(portp);
1703 mac_perim_exit(mph);
1704 }
1705
1706
1707 static boolean_t
valid_lacp_pdu(aggr_port_t * portp,lacp_t * lacp)1708 valid_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
1709 {
1710 /*
1711 * 43.4.12 - "a Receive machine shall not validate
1712 * the Version Number, TLV_type, or Reserved fields in received
1713 * LACPDUs."
1714 * ... "a Receive machine may validate the Actor_Information_Length,
1715 * Partner_Information_Length, Collector_Information_Length,
1716 * or Terminator_Length fields."
1717 */
1718 if ((lacp->actor_info.information_len != sizeof (link_info_t)) ||
1719 (lacp->partner_info.information_len != sizeof (link_info_t)) ||
1720 (lacp->collector_len != LACP_COLLECTOR_INFO_LEN) ||
1721 (lacp->terminator_len != LACP_TERMINATOR_INFO_LEN)) {
1722 AGGR_LACP_DBG(("trunk link (%d): Malformed LACPDU: "
1723 " Terminator Length = %d \n", portp->lp_linkid,
1724 lacp->terminator_len));
1725 return (B_FALSE);
1726 }
1727
1728 return (B_TRUE);
1729 }
1730
1731
1732 static void
start_current_while_timer(aggr_port_t * portp,uint_t time)1733 start_current_while_timer(aggr_port_t *portp, uint_t time)
1734 {
1735 aggr_lacp_port_t *pl = &portp->lp_lacp;
1736
1737 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1738
1739 mutex_enter(&pl->lacp_timer_lock);
1740 if (pl->current_while_timer.id == 0) {
1741 if (time > 0)
1742 pl->current_while_timer.val = time;
1743 else if (pl->ActorOperPortState.bit.timeout)
1744 pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
1745 else
1746 pl->current_while_timer.val = LONG_TIMEOUT_TIME;
1747
1748 pl->current_while_timer.id =
1749 timeout(current_while_timer_pop, portp,
1750 drv_usectohz((clock_t)1000000 *
1751 (clock_t)portp->lp_lacp.current_while_timer.val));
1752 }
1753 mutex_exit(&pl->lacp_timer_lock);
1754 }
1755
1756
1757 static void
stop_current_while_timer(aggr_port_t * portp)1758 stop_current_while_timer(aggr_port_t *portp)
1759 {
1760 aggr_lacp_port_t *pl = &portp->lp_lacp;
1761 timeout_id_t id;
1762
1763 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1764
1765 mutex_enter(&pl->lacp_timer_lock);
1766 if ((id = pl->current_while_timer.id) != 0) {
1767 pl->lacp_timer_bits &= ~LACP_CURRENT_WHILE_TIMEOUT;
1768 pl->current_while_timer.id = 0;
1769 }
1770 mutex_exit(&pl->lacp_timer_lock);
1771
1772 if (id != 0)
1773 (void) untimeout(id);
1774 }
1775
1776 static void
current_while_timer_pop(void * data)1777 current_while_timer_pop(void *data)
1778 {
1779 aggr_port_t *portp = (aggr_port_t *)data;
1780 aggr_lacp_port_t *pl = &portp->lp_lacp;
1781
1782 mutex_enter(&pl->lacp_timer_lock);
1783 pl->lacp_timer_bits |= LACP_CURRENT_WHILE_TIMEOUT;
1784 cv_broadcast(&pl->lacp_timer_cv);
1785 mutex_exit(&pl->lacp_timer_lock);
1786 }
1787
1788 static void
current_while_timer_pop_handler(aggr_port_t * portp)1789 current_while_timer_pop_handler(aggr_port_t *portp)
1790 {
1791 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1792
1793 AGGR_LACP_DBG(("trunk link:(%d): current_while_timer "
1794 "pop id=%p\n", portp->lp_linkid,
1795 portp->lp_lacp.current_while_timer.id));
1796
1797 lacp_receive_sm(portp, NULL);
1798 }
1799
1800 /*
1801 * record_Default - Simply copies over administrative values
1802 * to the partner operational values, and sets our state to indicate we
1803 * are using defaulted values.
1804 */
1805 static void
record_Default(aggr_port_t * portp)1806 record_Default(aggr_port_t *portp)
1807 {
1808 aggr_lacp_port_t *pl = &portp->lp_lacp;
1809
1810 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1811
1812 pl->PartnerOperPortNum = pl->PartnerAdminPortNum;
1813 pl->PartnerOperPortPriority = pl->PartnerAdminPortPriority;
1814 pl->PartnerOperSystem = pl->PartnerAdminSystem;
1815 pl->PartnerOperSysPriority = pl->PartnerAdminSysPriority;
1816 pl->PartnerOperKey = pl->PartnerAdminKey;
1817 pl->PartnerOperPortState.state = pl->PartnerAdminPortState.state;
1818
1819 pl->ActorOperPortState.bit.defaulted = B_TRUE;
1820 }
1821
1822
1823 /* Returns B_TRUE on sync value changing */
1824 static boolean_t
record_PDU(aggr_port_t * portp,lacp_t * lacp)1825 record_PDU(aggr_port_t *portp, lacp_t *lacp)
1826 {
1827 aggr_grp_t *aggrp = portp->lp_grp;
1828 aggr_lacp_port_t *pl = &portp->lp_lacp;
1829 uint8_t save_sync;
1830
1831 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
1832
1833 /*
1834 * Partner Information
1835 */
1836 pl->PartnerOperPortNum = ntohs(lacp->actor_info.port);
1837 pl->PartnerOperPortPriority =
1838 ntohs(lacp->actor_info.port_priority);
1839 pl->PartnerOperSystem = lacp->actor_info.system_id;
1840 pl->PartnerOperSysPriority =
1841 htons(lacp->actor_info.system_priority);
1842 pl->PartnerOperKey = ntohs(lacp->actor_info.key);
1843
1844 /* All state info except for Synchronization */
1845 save_sync = pl->PartnerOperPortState.bit.sync;
1846 pl->PartnerOperPortState.state = lacp->actor_info.state.state;
1847
1848 /* Defaulted set to FALSE */
1849 pl->ActorOperPortState.bit.defaulted = B_FALSE;
1850
1851 /*
1852 * 43.4.9 - (Partner_Port, Partner_Port_Priority, Partner_system,
1853 * Partner_System_Priority, Partner_Key, and
1854 * Partner_State.Aggregation) are compared to the
1855 * corresponding operations paramters values for
1856 * the Actor. If these are equal, or if this is
1857 * an individual link, we are synchronized.
1858 */
1859 if (((ntohs(lacp->partner_info.port) == pl->ActorPortNumber) &&
1860 (ntohs(lacp->partner_info.port_priority) ==
1861 pl->ActorPortPriority) &&
1862 (ether_cmp(&lacp->partner_info.system_id,
1863 (struct ether_addr *)&aggrp->lg_addr) == 0) &&
1864 (ntohs(lacp->partner_info.system_priority) ==
1865 aggrp->aggr.ActorSystemPriority) &&
1866 (ntohs(lacp->partner_info.key) == pl->ActorOperPortKey) &&
1867 (lacp->partner_info.state.bit.aggregation ==
1868 pl->ActorOperPortState.bit.aggregation)) ||
1869 (!lacp->actor_info.state.bit.aggregation)) {
1870
1871 pl->PartnerOperPortState.bit.sync =
1872 lacp->actor_info.state.bit.sync;
1873 } else {
1874 pl->PartnerOperPortState.bit.sync = B_FALSE;
1875 }
1876
1877 if (save_sync != pl->PartnerOperPortState.bit.sync) {
1878 AGGR_LACP_DBG(("record_PDU:(%d): partner sync "
1879 "%d -->%d\n", portp->lp_linkid, save_sync,
1880 pl->PartnerOperPortState.bit.sync));
1881 return (B_TRUE);
1882 } else {
1883 return (B_FALSE);
1884 }
1885 }
1886
1887
1888 /*
1889 * update_selected - If any of the Partner parameters has
1890 * changed from a previous value, then
1891 * unselect the link from the aggregator.
1892 */
1893 static boolean_t
update_selected(aggr_port_t * portp,lacp_t * lacp)1894 update_selected(aggr_port_t *portp, lacp_t *lacp)
1895 {
1896 aggr_lacp_port_t *pl = &portp->lp_lacp;
1897
1898 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1899
1900 if ((pl->PartnerOperPortNum != ntohs(lacp->actor_info.port)) ||
1901 (pl->PartnerOperPortPriority !=
1902 ntohs(lacp->actor_info.port_priority)) ||
1903 (ether_cmp(&pl->PartnerOperSystem,
1904 &lacp->actor_info.system_id) != 0) ||
1905 (pl->PartnerOperSysPriority !=
1906 ntohs(lacp->actor_info.system_priority)) ||
1907 (pl->PartnerOperKey != ntohs(lacp->actor_info.key)) ||
1908 (pl->PartnerOperPortState.bit.aggregation !=
1909 lacp->actor_info.state.bit.aggregation)) {
1910 AGGR_LACP_DBG(("update_selected:(%d): "
1911 "selected %d-->%d\n", portp->lp_linkid, pl->sm.selected,
1912 AGGR_UNSELECTED));
1913
1914 lacp_port_unselect(portp);
1915 return (B_TRUE);
1916 } else {
1917 return (B_FALSE);
1918 }
1919 }
1920
1921
1922 /*
1923 * update_default_selected - If any of the operational Partner parameters
1924 * is different than that of the administrative values
1925 * then unselect the link from the aggregator.
1926 */
1927 static void
update_default_selected(aggr_port_t * portp)1928 update_default_selected(aggr_port_t *portp)
1929 {
1930 aggr_lacp_port_t *pl = &portp->lp_lacp;
1931
1932 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1933
1934 if ((pl->PartnerAdminPortNum != pl->PartnerOperPortNum) ||
1935 (pl->PartnerOperPortPriority != pl->PartnerAdminPortPriority) ||
1936 (ether_cmp(&pl->PartnerOperSystem, &pl->PartnerAdminSystem) != 0) ||
1937 (pl->PartnerOperSysPriority != pl->PartnerAdminSysPriority) ||
1938 (pl->PartnerOperKey != pl->PartnerAdminKey) ||
1939 (pl->PartnerOperPortState.bit.aggregation !=
1940 pl->PartnerAdminPortState.bit.aggregation)) {
1941
1942 AGGR_LACP_DBG(("update_default_selected:(%d): "
1943 "selected %d-->%d\n", portp->lp_linkid,
1944 pl->sm.selected, AGGR_UNSELECTED));
1945
1946 lacp_port_unselect(portp);
1947 }
1948 }
1949
1950
1951 /*
1952 * update_NTT - If any of the Partner values in the received LACPDU
1953 * are different than that of the Actor operational
1954 * values then set NTT to true.
1955 */
1956 static void
update_NTT(aggr_port_t * portp,lacp_t * lacp)1957 update_NTT(aggr_port_t *portp, lacp_t *lacp)
1958 {
1959 aggr_grp_t *aggrp = portp->lp_grp;
1960 aggr_lacp_port_t *pl = &portp->lp_lacp;
1961
1962 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
1963
1964 if ((pl->ActorPortNumber != ntohs(lacp->partner_info.port)) ||
1965 (pl->ActorPortPriority !=
1966 ntohs(lacp->partner_info.port_priority)) ||
1967 (ether_cmp(&aggrp->lg_addr,
1968 &lacp->partner_info.system_id) != 0) ||
1969 (aggrp->aggr.ActorSystemPriority !=
1970 ntohs(lacp->partner_info.system_priority)) ||
1971 (pl->ActorOperPortKey != ntohs(lacp->partner_info.key)) ||
1972 (pl->ActorOperPortState.bit.activity !=
1973 lacp->partner_info.state.bit.activity) ||
1974 (pl->ActorOperPortState.bit.timeout !=
1975 lacp->partner_info.state.bit.timeout) ||
1976 (pl->ActorOperPortState.bit.sync !=
1977 lacp->partner_info.state.bit.sync) ||
1978 (pl->ActorOperPortState.bit.aggregation !=
1979 lacp->partner_info.state.bit.aggregation)) {
1980
1981 AGGR_LACP_DBG(("update_NTT:(%d): NTT %d-->%d\n",
1982 portp->lp_linkid, pl->NTT, B_TRUE));
1983
1984 pl->NTT = B_TRUE;
1985 }
1986 }
1987
1988 /*
1989 * lacp_receive_sm - LACP receive state machine
1990 *
1991 * parameters:
1992 * - portp - instance this applies to.
1993 * - lacp - pointer in the case of a received LACPDU.
1994 * This value is NULL if there is no LACPDU.
1995 *
1996 * invoked:
1997 * - when initialization is needed
1998 * - upon reception of an LACPDU. This is the common case.
1999 * - every time the current_while_timer pops
2000 */
2001 static void
lacp_receive_sm(aggr_port_t * portp,lacp_t * lacp)2002 lacp_receive_sm(aggr_port_t *portp, lacp_t *lacp)
2003 {
2004 boolean_t sync_updated, selected_updated, save_activity;
2005 aggr_lacp_port_t *pl = &portp->lp_lacp;
2006 lacp_receive_state_t oldstate = pl->sm.receive_state;
2007
2008 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
2009
2010 /* LACP_OFF state not in specification so check here. */
2011 if (!pl->sm.lacp_on)
2012 return;
2013
2014 /* figure next state */
2015 if (pl->sm.begin || pl->sm.port_moved) {
2016 pl->sm.receive_state = LACP_INITIALIZE;
2017 } else if (!pl->sm.port_enabled) { /* DL_NOTE_LINK_DOWN */
2018 pl->sm.receive_state = LACP_PORT_DISABLED;
2019 } else if (!pl->sm.lacp_enabled) { /* DL_NOTE_AGGR_UNAVAIL */
2020 pl->sm.receive_state =
2021 (pl->sm.receive_state == LACP_PORT_DISABLED) ?
2022 LACP_DISABLED : LACP_PORT_DISABLED;
2023 } else if (lacp != NULL) {
2024 if ((pl->sm.receive_state == LACP_EXPIRED) ||
2025 (pl->sm.receive_state == LACP_DEFAULTED)) {
2026 pl->sm.receive_state = LACP_CURRENT;
2027 }
2028 } else if ((pl->sm.receive_state == LACP_CURRENT) &&
2029 (pl->current_while_timer.id == 0)) {
2030 pl->sm.receive_state = LACP_EXPIRED;
2031 } else if ((pl->sm.receive_state == LACP_EXPIRED) &&
2032 (pl->current_while_timer.id == 0)) {
2033 pl->sm.receive_state = LACP_DEFAULTED;
2034 }
2035
2036 if (!((lacp && (oldstate == LACP_CURRENT) &&
2037 (pl->sm.receive_state == LACP_CURRENT)))) {
2038 AGGR_LACP_DBG(("lacp_receive_sm(%d):%s--->%s\n",
2039 portp->lp_linkid, lacp_receive_str[oldstate],
2040 lacp_receive_str[pl->sm.receive_state]));
2041 }
2042
2043 switch (pl->sm.receive_state) {
2044 case LACP_INITIALIZE:
2045 lacp_port_unselect(portp);
2046 record_Default(portp);
2047 pl->ActorOperPortState.bit.expired = B_FALSE;
2048 pl->sm.port_moved = B_FALSE;
2049 pl->sm.receive_state = LACP_PORT_DISABLED;
2050 pl->sm.begin = B_FALSE;
2051 lacp_receive_sm(portp, NULL);
2052 break;
2053
2054 case LACP_PORT_DISABLED:
2055 pl->PartnerOperPortState.bit.sync = B_FALSE;
2056 /*
2057 * Stop current_while_timer in case
2058 * we got here from link down
2059 */
2060 stop_current_while_timer(portp);
2061
2062 if (pl->sm.port_enabled && !pl->sm.lacp_enabled) {
2063 pl->sm.receive_state = LACP_DISABLED;
2064 lacp_receive_sm(portp, lacp);
2065 /* We goto LACP_DISABLED state */
2066 break;
2067 } else if (pl->sm.port_enabled && pl->sm.lacp_enabled) {
2068 pl->sm.receive_state = LACP_EXPIRED;
2069 /*
2070 * FALL THROUGH TO LACP_EXPIRED CASE:
2071 * We have no way of knowing if we get into
2072 * lacp_receive_sm() from a current_while_timer
2073 * expiring as it has never been kicked off yet!
2074 */
2075 } else {
2076 /* We stay in LACP_PORT_DISABLED state */
2077 break;
2078 }
2079 /* LACP_PORT_DISABLED -> LACP_EXPIRED */
2080 /* FALLTHROUGH */
2081
2082 case LACP_EXPIRED:
2083 /*
2084 * Arrives here from LACP_PORT_DISABLED state as well as
2085 * as well as current_while_timer expiring.
2086 */
2087 pl->PartnerOperPortState.bit.sync = B_FALSE;
2088 pl->PartnerOperPortState.bit.timeout = B_TRUE;
2089
2090 pl->ActorOperPortState.bit.expired = B_TRUE;
2091 start_current_while_timer(portp, SHORT_TIMEOUT_TIME);
2092 lacp_periodic_sm(portp);
2093 break;
2094
2095 case LACP_DISABLED:
2096 /*
2097 * This is the normal state for recv_sm when LACP_OFF
2098 * is set or the NIC is in half duplex mode.
2099 */
2100 lacp_port_unselect(portp);
2101 record_Default(portp);
2102 pl->PartnerOperPortState.bit.aggregation = B_FALSE;
2103 pl->ActorOperPortState.bit.expired = B_FALSE;
2104 break;
2105
2106 case LACP_DEFAULTED:
2107 /*
2108 * Current_while_timer expired a second time.
2109 */
2110 update_default_selected(portp);
2111 record_Default(portp); /* overwrite Partner Oper val */
2112 pl->ActorOperPortState.bit.expired = B_FALSE;
2113 pl->PartnerOperPortState.bit.sync = B_TRUE;
2114
2115 lacp_selection_logic(portp);
2116 lacp_mux_sm(portp);
2117 break;
2118
2119 case LACP_CURRENT:
2120 /*
2121 * Reception of LACPDU
2122 */
2123
2124 if (!lacp) /* no LACPDU so current_while_timer popped */
2125 break;
2126
2127 AGGR_LACP_DBG(("lacp_receive_sm: (%d): LACPDU received:\n",
2128 portp->lp_linkid));
2129
2130 /*
2131 * Validate Actor_Information_Length,
2132 * Partner_Information_Length, Collector_Information_Length,
2133 * and Terminator_Length fields.
2134 */
2135 if (!valid_lacp_pdu(portp, lacp)) {
2136 AGGR_LACP_DBG(("lacp_receive_sm (%d): "
2137 "Invalid LACPDU received\n",
2138 portp->lp_linkid));
2139 break;
2140 }
2141
2142 save_activity = pl->PartnerOperPortState.bit.activity;
2143 selected_updated = update_selected(portp, lacp);
2144 update_NTT(portp, lacp);
2145 sync_updated = record_PDU(portp, lacp);
2146
2147 pl->ActorOperPortState.bit.expired = B_FALSE;
2148
2149 if (selected_updated) {
2150 lacp_selection_logic(portp);
2151 lacp_mux_sm(portp);
2152 } else if (sync_updated) {
2153 lacp_mux_sm(portp);
2154 }
2155
2156 /*
2157 * If the periodic timer value bit has been modified
2158 * or the partner activity bit has been changed then
2159 * we need to respectively:
2160 * - restart the timer with the proper timeout value.
2161 * - possibly enable/disable transmission of LACPDUs.
2162 */
2163 if ((pl->PartnerOperPortState.bit.timeout &&
2164 (pl->periodic_timer.val != FAST_PERIODIC_TIME)) ||
2165 (!pl->PartnerOperPortState.bit.timeout &&
2166 (pl->periodic_timer.val != SLOW_PERIODIC_TIME)) ||
2167 (pl->PartnerOperPortState.bit.activity !=
2168 save_activity)) {
2169 lacp_periodic_sm(portp);
2170 }
2171
2172 stop_current_while_timer(portp);
2173 /* Check if we need to transmit an LACPDU */
2174 if (pl->NTT)
2175 lacp_xmit_sm(portp);
2176 start_current_while_timer(portp, 0);
2177
2178 break;
2179 }
2180 }
2181
2182 static void
aggr_set_coll_dist(aggr_port_t * portp,boolean_t enable)2183 aggr_set_coll_dist(aggr_port_t *portp, boolean_t enable)
2184 {
2185 mac_perim_handle_t mph;
2186
2187 AGGR_LACP_DBG(("AGGR_SET_COLL_DIST_TYPE: (%d) %s\n",
2188 portp->lp_linkid, enable ? "ENABLED" : "DISABLED"));
2189
2190 mac_perim_enter_by_mh(portp->lp_mh, &mph);
2191 if (!enable) {
2192 /*
2193 * Turn OFF Collector_Distributor.
2194 */
2195 portp->lp_collector_enabled = B_FALSE;
2196 aggr_send_port_disable(portp);
2197 goto done;
2198 }
2199
2200 /*
2201 * Turn ON Collector_Distributor.
2202 */
2203
2204 if (!portp->lp_lacp.sm.lacp_on || (portp->lp_lacp.sm.lacp_on &&
2205 (portp->lp_lacp.sm.mux_state == LACP_COLLECTING_DISTRIBUTING))) {
2206 /* Port is compatible and can be aggregated */
2207 portp->lp_collector_enabled = B_TRUE;
2208 aggr_send_port_enable(portp);
2209 }
2210
2211 done:
2212 mac_perim_exit(mph);
2213 }
2214
2215 /*
2216 * Because the LACP packet processing needs to enter the aggr's mac perimeter
2217 * and that would potentially cause a deadlock with the thread in which the
2218 * grp/port is deleted, we defer the packet process to a worker thread. Here
2219 * we only enqueue the received Marker or LACPDU for later processing.
2220 */
2221 void
aggr_lacp_rx_enqueue(aggr_port_t * portp,mblk_t * dmp)2222 aggr_lacp_rx_enqueue(aggr_port_t *portp, mblk_t *dmp)
2223 {
2224 aggr_grp_t *grp = portp->lp_grp;
2225 lacp_t *lacp;
2226
2227 dmp->b_rptr += sizeof (struct ether_header);
2228
2229 if (MBLKL(dmp) < sizeof (lacp_t)) {
2230 freemsg(dmp);
2231 return;
2232 }
2233
2234 lacp = (lacp_t *)dmp->b_rptr;
2235 if (lacp->subtype != LACP_SUBTYPE && lacp->subtype != MARKER_SUBTYPE) {
2236 AGGR_LACP_DBG(("aggr_lacp_rx_enqueue: (%d): "
2237 "Unknown Slow Protocol type %d\n",
2238 portp->lp_linkid, lacp->subtype));
2239 freemsg(dmp);
2240 return;
2241 }
2242
2243 mutex_enter(&grp->lg_lacp_lock);
2244
2245 /*
2246 * If the lg_lacp_done is set, this aggregation is in the process of
2247 * being deleted, return directly.
2248 */
2249 if (grp->lg_lacp_done) {
2250 mutex_exit(&grp->lg_lacp_lock);
2251 freemsg(dmp);
2252 return;
2253 }
2254
2255 if (grp->lg_lacp_tail == NULL) {
2256 grp->lg_lacp_head = grp->lg_lacp_tail = dmp;
2257 } else {
2258 grp->lg_lacp_tail->b_next = dmp;
2259 grp->lg_lacp_tail = dmp;
2260 }
2261
2262 /*
2263 * Hold a reference of the port so that the port won't be freed when it
2264 * is removed from the aggr. The b_prev field is borrowed to save the
2265 * port information.
2266 */
2267 AGGR_PORT_REFHOLD(portp);
2268 dmp->b_prev = (mblk_t *)portp;
2269 cv_broadcast(&grp->lg_lacp_cv);
2270 mutex_exit(&grp->lg_lacp_lock);
2271 }
2272
2273 static void
aggr_lacp_rx(mblk_t * dmp)2274 aggr_lacp_rx(mblk_t *dmp)
2275 {
2276 aggr_port_t *portp = (aggr_port_t *)dmp->b_prev;
2277 mac_perim_handle_t mph;
2278 lacp_t *lacp;
2279
2280 dmp->b_prev = NULL;
2281
2282 mac_perim_enter_by_mh(portp->lp_grp->lg_mh, &mph);
2283 if (portp->lp_closing)
2284 goto done;
2285
2286 lacp = (lacp_t *)dmp->b_rptr;
2287 switch (lacp->subtype) {
2288 case LACP_SUBTYPE:
2289 AGGR_LACP_DBG(("aggr_lacp_rx:(%d): LACPDU received.\n",
2290 portp->lp_linkid));
2291
2292 if (!portp->lp_lacp.sm.lacp_on) {
2293 break;
2294 }
2295 lacp_receive_sm(portp, lacp);
2296 break;
2297
2298 case MARKER_SUBTYPE:
2299 AGGR_LACP_DBG(("aggr_lacp_rx:(%d): Marker Packet received.\n",
2300 portp->lp_linkid));
2301
2302 if (receive_marker_pdu(portp, dmp) != 0)
2303 break;
2304
2305 /* Send the packet over the first TX ring */
2306 dmp = mac_hwring_send_priv(portp->lp_mch,
2307 portp->lp_tx_rings[0], dmp);
2308 if (dmp != NULL)
2309 freemsg(dmp);
2310 mac_perim_exit(mph);
2311 AGGR_PORT_REFRELE(portp);
2312 return;
2313 }
2314
2315 done:
2316 mac_perim_exit(mph);
2317 AGGR_PORT_REFRELE(portp);
2318 freemsg(dmp);
2319 }
2320
2321 void
aggr_lacp_rx_thread(void * arg)2322 aggr_lacp_rx_thread(void *arg)
2323 {
2324 callb_cpr_t cprinfo;
2325 aggr_grp_t *grp = (aggr_grp_t *)arg;
2326 aggr_port_t *port;
2327 mblk_t *mp, *nextmp;
2328
2329 CALLB_CPR_INIT(&cprinfo, &grp->lg_lacp_lock, callb_generic_cpr,
2330 "aggr_lacp_rx_thread");
2331
2332 mutex_enter(&grp->lg_lacp_lock);
2333
2334 /*
2335 * Quit the thread if the grp is deleted.
2336 */
2337 while (!grp->lg_lacp_done) {
2338 if ((mp = grp->lg_lacp_head) == NULL) {
2339 CALLB_CPR_SAFE_BEGIN(&cprinfo);
2340 cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
2341 CALLB_CPR_SAFE_END(&cprinfo, &grp->lg_lacp_lock);
2342 continue;
2343 }
2344
2345 grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
2346 mutex_exit(&grp->lg_lacp_lock);
2347
2348 while (mp != NULL) {
2349 nextmp = mp->b_next;
2350 mp->b_next = NULL;
2351 aggr_lacp_rx(mp);
2352 mp = nextmp;
2353 }
2354 mutex_enter(&grp->lg_lacp_lock);
2355 }
2356
2357 /*
2358 * The grp is being destroyed, simply free all of the LACP messages
2359 * left in the queue which did not have the chance to be processed.
2360 * We cannot use freemsgchain() here since we need to clear the
2361 * b_prev field.
2362 */
2363 for (mp = grp->lg_lacp_head; mp != NULL; mp = nextmp) {
2364 port = (aggr_port_t *)mp->b_prev;
2365 AGGR_PORT_REFRELE(port);
2366 nextmp = mp->b_next;
2367 mp->b_next = NULL;
2368 mp->b_prev = NULL;
2369 freemsg(mp);
2370 }
2371
2372 grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
2373 grp->lg_lacp_rx_thread = NULL;
2374 cv_broadcast(&grp->lg_lacp_cv);
2375 CALLB_CPR_EXIT(&cprinfo);
2376 thread_exit();
2377 }
2378