altq_red.c revision a29f5e7ca80965b7551a01382485f2e7cba838ec
1/*	$FreeBSD$	*/
2/*	$KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $	*/
3
4/*
5 * Copyright (C) 1997-2003
6 *	Sony Computer Science Laboratories Inc.  All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30/*
31 * Copyright (c) 1990-1994 Regents of the University of California.
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 *    notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 *    notice, this list of conditions and the following disclaimer in the
41 *    documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 *    must display the following acknowledgement:
44 *	This product includes software developed by the Computer Systems
45 *	Engineering Group at Lawrence Berkeley Laboratory.
46 * 4. Neither the name of the University nor of the Laboratory may be used
47 *    to endorse or promote products derived from this software without
48 *    specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63#if defined(__FreeBSD__) || defined(__NetBSD__)
64#include "opt_altq.h"
65#include "opt_inet.h"
66#ifdef __FreeBSD__
67#include "opt_inet6.h"
68#endif
69#endif /* __FreeBSD__ || __NetBSD__ */
70#ifdef ALTQ_RED	/* red is enabled by ALTQ_RED option in opt_altq.h */
71
72#include <sys/param.h>
73#include <sys/malloc.h>
74#include <sys/mbuf.h>
75#include <sys/socket.h>
76#include <sys/systm.h>
77#include <sys/errno.h>
78#if 1 /* ALTQ3_COMPAT */
79#include <sys/sockio.h>
80#include <sys/proc.h>
81#include <sys/kernel.h>
82#ifdef ALTQ_FLOWVALVE
83#include <sys/queue.h>
84#include <sys/time.h>
85#endif
86#endif /* ALTQ3_COMPAT */
87
88#include <net/if.h>
89#include <net/if_var.h>
90
91#include <netinet/in.h>
92#include <netinet/in_systm.h>
93#include <netinet/ip.h>
94#ifdef INET6
95#include <netinet/ip6.h>
96#endif
97
98#include <netpfil/pf/pf.h>
99#include <netpfil/pf/pf_altq.h>
100#include <netpfil/pf/pf_mtag.h>
101#include <net/altq/altq.h>
102#include <net/altq/altq_red.h>
103#ifdef ALTQ3_COMPAT
104#include <net/altq/altq_conf.h>
105#ifdef ALTQ_FLOWVALVE
106#include <net/altq/altq_flowvalve.h>
107#endif
108#endif
109
110/*
111 * ALTQ/RED (Random Early Detection) implementation using 32-bit
112 * fixed-point calculation.
113 *
114 * written by kjc using the ns code as a reference.
115 * you can learn more about red and ns from Sally's home page at
116 * http://www-nrg.ee.lbl.gov/floyd/
117 *
118 * most of the red parameter values are fixed in this implementation
119 * to prevent fixed-point overflow/underflow.
120 * if you change the parameters, watch out for overflow/underflow!
121 *
122 * the parameters used are recommended values by Sally.
123 * the corresponding ns config looks:
124 *	q_weight=0.00195
125 *	minthresh=5 maxthresh=15 queue-size=60
126 *	linterm=30
127 *	dropmech=drop-tail
128 *	bytes=false (can't be handled by 32-bit fixed-point)
129 *	doubleq=false dqthresh=false
130 *	wait=true
131 */
132/*
133 * alternative red parameters for a slow link.
134 *
135 * assume the queue length becomes from zero to L and keeps L, it takes
136 * N packets for q_avg to reach 63% of L.
137 * when q_weight is 0.002, N is about 500 packets.
138 * for a slow link like dial-up, 500 packets takes more than 1 minute!
139 * when q_weight is 0.008, N is about 127 packets.
140 * when q_weight is 0.016, N is about 63 packets.
141 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
142 * are allowed for 0.016.
143 * see Sally's paper for more details.
144 */
145/* normal red parameters */
146#define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
147				/* q_weight = 0.00195 */
148
149/* red parameters for a slow link */
150#define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
151				/* q_weight = 0.0078125 */
152
153/* red parameters for a very slow link (e.g., dialup) */
154#define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
155				/* q_weight = 0.015625 */
156
157/* fixed-point uses 12-bit decimal places */
158#define	FP_SHIFT	12	/* fixed-point shift */
159
160/* red parameters for drop probability */
161#define	INV_P_MAX	10	/* inverse of max drop probability */
162#define	TH_MIN		5	/* min threshold */
163#define	TH_MAX		15	/* max threshold */
164
165#define	RED_LIMIT	60	/* default max queue lenght */
166#define	RED_STATS		/* collect statistics */
167
168/*
169 * our default policy for forced-drop is drop-tail.
170 * (in altq-1.1.2 or earlier, the default was random-drop.
171 * but it makes more sense to punish the cause of the surge.)
172 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
173 */
174
175#ifdef ALTQ3_COMPAT
176#ifdef ALTQ_FLOWVALVE
177/*
178 * flow-valve is an extention to protect red from unresponsive flows
179 * and to promote end-to-end congestion control.
180 * flow-valve observes the average drop rates of the flows that have
181 * experienced packet drops in the recent past.
182 * when the average drop rate exceeds the threshold, the flow is
183 * blocked by the flow-valve.  the trapped flow should back off
184 * exponentially to escape from the flow-valve.
185 */
186#ifdef RED_RANDOM_DROP
187#error "random-drop can't be used with flow-valve!"
188#endif
189#endif /* ALTQ_FLOWVALVE */
190
191/* red_list keeps all red_queue_t's allocated. */
192static red_queue_t *red_list = NULL;
193
194#endif /* ALTQ3_COMPAT */
195
196/* default red parameter values */
197static int default_th_min = TH_MIN;
198static int default_th_max = TH_MAX;
199static int default_inv_pmax = INV_P_MAX;
200
201#ifdef ALTQ3_COMPAT
202/* internal function prototypes */
203static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
204static struct mbuf *red_dequeue(struct ifaltq *, int);
205static int red_request(struct ifaltq *, int, void *);
206static void red_purgeq(red_queue_t *);
207static int red_detach(red_queue_t *);
208#ifdef ALTQ_FLOWVALVE
209static __inline struct fve *flowlist_lookup(struct flowvalve *,
210			 struct altq_pktattr *, struct timeval *);
211static __inline struct fve *flowlist_reclaim(struct flowvalve *,
212					     struct altq_pktattr *);
213static __inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
214static __inline int fv_p2f(struct flowvalve *, int);
215#if 0 /* XXX: make the compiler happy (fv_alloc unused) */
216static struct flowvalve *fv_alloc(struct red *);
217#endif
218static void fv_destroy(struct flowvalve *);
219static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
220			struct fve **);
221static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
222			 struct fve *);
223#endif
224#endif /* ALTQ3_COMPAT */
225
226/*
227 * red support routines
228 */
229red_t *
230red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
231   int pkttime)
232{
233	red_t	*rp;
234	int	 w, i;
235	int	 npkts_per_sec;
236
237	rp = malloc(sizeof(red_t), M_DEVBUF, M_NOWAIT | M_ZERO);
238	if (rp == NULL)
239		return (NULL);
240
241	if (weight == 0)
242		rp->red_weight = W_WEIGHT;
243	else
244		rp->red_weight = weight;
245
246	/* allocate weight table */
247	rp->red_wtab = wtab_alloc(rp->red_weight);
248	if (rp->red_wtab == NULL) {
249		free(rp, M_DEVBUF);
250		return (NULL);
251	}
252
253	rp->red_avg = 0;
254	rp->red_idle = 1;
255
256	if (inv_pmax == 0)
257		rp->red_inv_pmax = default_inv_pmax;
258	else
259		rp->red_inv_pmax = inv_pmax;
260	if (th_min == 0)
261		rp->red_thmin = default_th_min;
262	else
263		rp->red_thmin = th_min;
264	if (th_max == 0)
265		rp->red_thmax = default_th_max;
266	else
267		rp->red_thmax = th_max;
268
269	rp->red_flags = flags;
270
271	if (pkttime == 0)
272		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
273		rp->red_pkttime = 800;
274	else
275		rp->red_pkttime = pkttime;
276
277	if (weight == 0) {
278		/* when the link is very slow, adjust red parameters */
279		npkts_per_sec = 1000000 / rp->red_pkttime;
280		if (npkts_per_sec < 50) {
281			/* up to about 400Kbps */
282			rp->red_weight = W_WEIGHT_2;
283		} else if (npkts_per_sec < 300) {
284			/* up to about 2.4Mbps */
285			rp->red_weight = W_WEIGHT_1;
286		}
287	}
288
289	/* calculate wshift.  weight must be power of 2 */
290	w = rp->red_weight;
291	for (i = 0; w > 1; i++)
292		w = w >> 1;
293	rp->red_wshift = i;
294	w = 1 << rp->red_wshift;
295	if (w != rp->red_weight) {
296		printf("invalid weight value %d for red! use %d\n",
297		       rp->red_weight, w);
298		rp->red_weight = w;
299	}
300
301	/*
302	 * thmin_s and thmax_s are scaled versions of th_min and th_max
303	 * to be compared with avg.
304	 */
305	rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
306	rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
307
308	/*
309	 * precompute probability denominator
310	 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
311	 */
312	rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
313			 * rp->red_inv_pmax) << FP_SHIFT;
314
315	microtime(&rp->red_last);
316	return (rp);
317}
318
319void
320red_destroy(red_t *rp)
321{
322#ifdef ALTQ3_COMPAT
323#ifdef ALTQ_FLOWVALVE
324	if (rp->red_flowvalve != NULL)
325		fv_destroy(rp->red_flowvalve);
326#endif
327#endif /* ALTQ3_COMPAT */
328	wtab_destroy(rp->red_wtab);
329	free(rp, M_DEVBUF);
330}
331
332void
333red_getstats(red_t *rp, struct redstats *sp)
334{
335	sp->q_avg		= rp->red_avg >> rp->red_wshift;
336	sp->xmit_cnt		= rp->red_stats.xmit_cnt;
337	sp->drop_cnt		= rp->red_stats.drop_cnt;
338	sp->drop_forced		= rp->red_stats.drop_forced;
339	sp->drop_unforced	= rp->red_stats.drop_unforced;
340	sp->marked_packets	= rp->red_stats.marked_packets;
341}
342
343int
344red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
345    struct altq_pktattr *pktattr)
346{
347	int avg, droptype;
348	int n;
349#ifdef ALTQ3_COMPAT
350#ifdef ALTQ_FLOWVALVE
351	struct fve *fve = NULL;
352
353	if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
354		if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
355			m_freem(m);
356			return (-1);
357		}
358#endif
359#endif /* ALTQ3_COMPAT */
360
361	avg = rp->red_avg;
362
363	/*
364	 * if we were idle, we pretend that n packets arrived during
365	 * the idle period.
366	 */
367	if (rp->red_idle) {
368		struct timeval now;
369		int t;
370
371		rp->red_idle = 0;
372		microtime(&now);
373		t = (now.tv_sec - rp->red_last.tv_sec);
374		if (t > 60) {
375			/*
376			 * being idle for more than 1 minute, set avg to zero.
377			 * this prevents t from overflow.
378			 */
379			avg = 0;
380		} else {
381			t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
382			n = t / rp->red_pkttime - 1;
383
384			/* the following line does (avg = (1 - Wq)^n * avg) */
385			if (n > 0)
386				avg = (avg >> FP_SHIFT) *
387				    pow_w(rp->red_wtab, n);
388		}
389	}
390
391	/* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
392	avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
393	rp->red_avg = avg;		/* save the new value */
394
395	/*
396	 * red_count keeps a tally of arriving traffic that has not
397	 * been dropped.
398	 */
399	rp->red_count++;
400
401	/* see if we drop early */
402	droptype = DTYPE_NODROP;
403	if (avg >= rp->red_thmin_s && qlen(q) > 1) {
404		if (avg >= rp->red_thmax_s) {
405			/* avg >= th_max: forced drop */
406			droptype = DTYPE_FORCED;
407		} else if (rp->red_old == 0) {
408			/* first exceeds th_min */
409			rp->red_count = 1;
410			rp->red_old = 1;
411		} else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
412				      rp->red_probd, rp->red_count)) {
413			/* mark or drop by red */
414			if ((rp->red_flags & REDF_ECN) &&
415			    mark_ecn(m, pktattr, rp->red_flags)) {
416				/* successfully marked.  do not drop. */
417				rp->red_count = 0;
418#ifdef RED_STATS
419				rp->red_stats.marked_packets++;
420#endif
421			} else {
422				/* unforced drop by red */
423				droptype = DTYPE_EARLY;
424			}
425		}
426	} else {
427		/* avg < th_min */
428		rp->red_old = 0;
429	}
430
431	/*
432	 * if the queue length hits the hard limit, it's a forced drop.
433	 */
434	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
435		droptype = DTYPE_FORCED;
436
437#ifdef RED_RANDOM_DROP
438	/* if successful or forced drop, enqueue this packet. */
439	if (droptype != DTYPE_EARLY)
440		_addq(q, m);
441#else
442	/* if successful, enqueue this packet. */
443	if (droptype == DTYPE_NODROP)
444		_addq(q, m);
445#endif
446	if (droptype != DTYPE_NODROP) {
447		if (droptype == DTYPE_EARLY) {
448			/* drop the incoming packet */
449#ifdef RED_STATS
450			rp->red_stats.drop_unforced++;
451#endif
452		} else {
453			/* forced drop, select a victim packet in the queue. */
454#ifdef RED_RANDOM_DROP
455			m = _getq_random(q);
456#endif
457#ifdef RED_STATS
458			rp->red_stats.drop_forced++;
459#endif
460		}
461#ifdef RED_STATS
462		PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
463#endif
464		rp->red_count = 0;
465#ifdef ALTQ3_COMPAT
466#ifdef ALTQ_FLOWVALVE
467		if (rp->red_flowvalve != NULL)
468			fv_dropbyred(rp->red_flowvalve, pktattr, fve);
469#endif
470#endif /* ALTQ3_COMPAT */
471		m_freem(m);
472		return (-1);
473	}
474	/* successfully queued */
475#ifdef RED_STATS
476	PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
477#endif
478	return (0);
479}
480
481/*
482 * early-drop probability is calculated as follows:
483 *   prob = p_max * (avg - th_min) / (th_max - th_min)
484 *   prob_a = prob / (2 - count*prob)
485 *	    = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
486 * here prob_a increases as successive undrop count increases.
487 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
488 * becomes 1 when (count >= (2 / prob))).
489 */
490int
491drop_early(int fp_len, int fp_probd, int count)
492{
493	int	d;		/* denominator of drop-probability */
494
495	d = fp_probd - count * fp_len;
496	if (d <= 0)
497		/* count exceeds the hard limit: drop or mark */
498		return (1);
499
500	/*
501	 * now the range of d is [1..600] in fixed-point. (when
502	 * th_max-th_min=10 and p_max=1/30)
503	 * drop probability = (avg - TH_MIN) / d
504	 */
505
506	if ((arc4random() % d) < fp_len) {
507		/* drop or mark */
508		return (1);
509	}
510	/* no drop/mark */
511	return (0);
512}
513
514/*
515 * try to mark CE bit to the packet.
516 *    returns 1 if successfully marked, 0 otherwise.
517 */
518int
519mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
520{
521	struct mbuf	*m0;
522	struct pf_mtag	*at;
523	void		*hdr;
524
525	at = pf_find_mtag(m);
526	if (at != NULL) {
527		hdr = at->hdr;
528#ifdef ALTQ3_COMPAT
529	} else if (pktattr != NULL) {
530		af = pktattr->pattr_af;
531		hdr = pktattr->pattr_hdr;
532#endif /* ALTQ3_COMPAT */
533	} else
534		return (0);
535
536	/* verify that pattr_hdr is within the mbuf data */
537	for (m0 = m; m0 != NULL; m0 = m0->m_next)
538		if (((caddr_t)hdr >= m0->m_data) &&
539		    ((caddr_t)hdr < m0->m_data + m0->m_len))
540			break;
541	if (m0 == NULL) {
542		/* ick, tag info is stale */
543		return (0);
544	}
545
546	switch (((struct ip *)hdr)->ip_v) {
547	case IPVERSION:
548		if (flags & REDF_ECN4) {
549			struct ip *ip = hdr;
550			u_int8_t otos;
551			int sum;
552
553			if (ip->ip_v != 4)
554				return (0);	/* version mismatch! */
555
556			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
557				return (0);	/* not-ECT */
558			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
559				return (1);	/* already marked */
560
561			/*
562			 * ecn-capable but not marked,
563			 * mark CE and update checksum
564			 */
565			otos = ip->ip_tos;
566			ip->ip_tos |= IPTOS_ECN_CE;
567			/*
568			 * update checksum (from RFC1624)
569			 *	   HC' = ~(~HC + ~m + m')
570			 */
571			sum = ~ntohs(ip->ip_sum) & 0xffff;
572			sum += (~otos & 0xffff) + ip->ip_tos;
573			sum = (sum >> 16) + (sum & 0xffff);
574			sum += (sum >> 16);  /* add carry */
575			ip->ip_sum = htons(~sum & 0xffff);
576			return (1);
577		}
578		break;
579#ifdef INET6
580	case (IPV6_VERSION >> 4):
581		if (flags & REDF_ECN6) {
582			struct ip6_hdr *ip6 = hdr;
583			u_int32_t flowlabel;
584
585			flowlabel = ntohl(ip6->ip6_flow);
586			if ((flowlabel >> 28) != 6)
587				return (0);	/* version mismatch! */
588			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
589			    (IPTOS_ECN_NOTECT << 20))
590				return (0);	/* not-ECT */
591			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
592			    (IPTOS_ECN_CE << 20))
593				return (1);	/* already marked */
594			/*
595			 * ecn-capable but not marked,  mark CE
596			 */
597			flowlabel |= (IPTOS_ECN_CE << 20);
598			ip6->ip6_flow = htonl(flowlabel);
599			return (1);
600		}
601		break;
602#endif  /* INET6 */
603	}
604
605	/* not marked */
606	return (0);
607}
608
609struct mbuf *
610red_getq(rp, q)
611	red_t *rp;
612	class_queue_t *q;
613{
614	struct mbuf *m;
615
616	if ((m = _getq(q)) == NULL) {
617		if (rp->red_idle == 0) {
618			rp->red_idle = 1;
619			microtime(&rp->red_last);
620		}
621		return NULL;
622	}
623
624	rp->red_idle = 0;
625	return (m);
626}
627
628/*
629 * helper routine to calibrate avg during idle.
630 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
631 * here Wq = 1/weight and the code assumes Wq is close to zero.
632 *
633 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
634 */
635static struct wtab *wtab_list = NULL;	/* pointer to wtab list */
636
637struct wtab *
638wtab_alloc(int weight)
639{
640	struct wtab	*w;
641	int		 i;
642
643	for (w = wtab_list; w != NULL; w = w->w_next)
644		if (w->w_weight == weight) {
645			w->w_refcount++;
646			return (w);
647		}
648
649	w = malloc(sizeof(struct wtab), M_DEVBUF, M_NOWAIT | M_ZERO);
650	if (w == NULL)
651		return (NULL);
652	w->w_weight = weight;
653	w->w_refcount = 1;
654	w->w_next = wtab_list;
655	wtab_list = w;
656
657	/* initialize the weight table */
658	w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
659	for (i = 1; i < 32; i++) {
660		w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
661		if (w->w_tab[i] == 0 && w->w_param_max == 0)
662			w->w_param_max = 1 << i;
663	}
664
665	return (w);
666}
667
668int
669wtab_destroy(struct wtab *w)
670{
671	struct wtab	*prev;
672
673	if (--w->w_refcount > 0)
674		return (0);
675
676	if (wtab_list == w)
677		wtab_list = w->w_next;
678	else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
679		if (prev->w_next == w) {
680			prev->w_next = w->w_next;
681			break;
682		}
683
684	free(w, M_DEVBUF);
685	return (0);
686}
687
688int32_t
689pow_w(struct wtab *w, int n)
690{
691	int	i, bit;
692	int32_t	val;
693
694	if (n >= w->w_param_max)
695		return (0);
696
697	val = 1 << FP_SHIFT;
698	if (n <= 0)
699		return (val);
700
701	bit = 1;
702	i = 0;
703	while (n) {
704		if (n & bit) {
705			val = (val * w->w_tab[i]) >> FP_SHIFT;
706			n &= ~bit;
707		}
708		i++;
709		bit <<=  1;
710	}
711	return (val);
712}
713
714#ifdef ALTQ3_COMPAT
715/*
716 * red device interface
717 */
718altqdev_decl(red);
719
720int
721redopen(dev, flag, fmt, p)
722	dev_t dev;
723	int flag, fmt;
724#if (__FreeBSD_version > 500000)
725	struct thread *p;
726#else
727	struct proc *p;
728#endif
729{
730	/* everything will be done when the queueing scheme is attached. */
731	return 0;
732}
733
734int
735redclose(dev, flag, fmt, p)
736	dev_t dev;
737	int flag, fmt;
738#if (__FreeBSD_version > 500000)
739	struct thread *p;
740#else
741	struct proc *p;
742#endif
743{
744	red_queue_t *rqp;
745	int err, error = 0;
746
747	while ((rqp = red_list) != NULL) {
748		/* destroy all */
749		err = red_detach(rqp);
750		if (err != 0 && error == 0)
751			error = err;
752	}
753
754	return error;
755}
756
757int
758redioctl(dev, cmd, addr, flag, p)
759	dev_t dev;
760	ioctlcmd_t cmd;
761	caddr_t addr;
762	int flag;
763#if (__FreeBSD_version > 500000)
764	struct thread *p;
765#else
766	struct proc *p;
767#endif
768{
769	red_queue_t *rqp;
770	struct red_interface *ifacep;
771	struct ifnet *ifp;
772	int	error = 0;
773
774	/* check super-user privilege */
775	switch (cmd) {
776	case RED_GETSTATS:
777		break;
778	default:
779#if (__FreeBSD_version > 700000)
780		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
781#elsif (__FreeBSD_version > 400000)
782		if ((error = suser(p)) != 0)
783#else
784		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
785#endif
786			return (error);
787		break;
788	}
789
790	switch (cmd) {
791
792	case RED_ENABLE:
793		ifacep = (struct red_interface *)addr;
794		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
795			error = EBADF;
796			break;
797		}
798		error = altq_enable(rqp->rq_ifq);
799		break;
800
801	case RED_DISABLE:
802		ifacep = (struct red_interface *)addr;
803		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
804			error = EBADF;
805			break;
806		}
807		error = altq_disable(rqp->rq_ifq);
808		break;
809
810	case RED_IF_ATTACH:
811		ifp = ifunit(((struct red_interface *)addr)->red_ifname);
812		if (ifp == NULL) {
813			error = ENXIO;
814			break;
815		}
816
817		/* allocate and initialize red_queue_t */
818		rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
819		if (rqp == NULL) {
820			error = ENOMEM;
821			break;
822		}
823		bzero(rqp, sizeof(red_queue_t));
824
825		rqp->rq_q = malloc(sizeof(class_queue_t),
826		       M_DEVBUF, M_WAITOK);
827		if (rqp->rq_q == NULL) {
828			free(rqp, M_DEVBUF);
829			error = ENOMEM;
830			break;
831		}
832		bzero(rqp->rq_q, sizeof(class_queue_t));
833
834		rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
835		if (rqp->rq_red == NULL) {
836			free(rqp->rq_q, M_DEVBUF);
837			free(rqp, M_DEVBUF);
838			error = ENOMEM;
839			break;
840		}
841
842		rqp->rq_ifq = &ifp->if_snd;
843		qtail(rqp->rq_q) = NULL;
844		qlen(rqp->rq_q) = 0;
845		qlimit(rqp->rq_q) = RED_LIMIT;
846		qtype(rqp->rq_q) = Q_RED;
847
848		/*
849		 * set RED to this ifnet structure.
850		 */
851		error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
852				    red_enqueue, red_dequeue, red_request,
853				    NULL, NULL);
854		if (error) {
855			red_destroy(rqp->rq_red);
856			free(rqp->rq_q, M_DEVBUF);
857			free(rqp, M_DEVBUF);
858			break;
859		}
860
861		/* add this state to the red list */
862		rqp->rq_next = red_list;
863		red_list = rqp;
864		break;
865
866	case RED_IF_DETACH:
867		ifacep = (struct red_interface *)addr;
868		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
869			error = EBADF;
870			break;
871		}
872		error = red_detach(rqp);
873		break;
874
875	case RED_GETSTATS:
876		do {
877			struct red_stats *q_stats;
878			red_t *rp;
879
880			q_stats = (struct red_stats *)addr;
881			if ((rqp = altq_lookup(q_stats->iface.red_ifname,
882					     ALTQT_RED)) == NULL) {
883				error = EBADF;
884				break;
885			}
886
887			q_stats->q_len 	   = qlen(rqp->rq_q);
888			q_stats->q_limit   = qlimit(rqp->rq_q);
889
890			rp = rqp->rq_red;
891			q_stats->q_avg 	   = rp->red_avg >> rp->red_wshift;
892			q_stats->xmit_cnt  = rp->red_stats.xmit_cnt;
893			q_stats->drop_cnt  = rp->red_stats.drop_cnt;
894			q_stats->drop_forced   = rp->red_stats.drop_forced;
895			q_stats->drop_unforced = rp->red_stats.drop_unforced;
896			q_stats->marked_packets = rp->red_stats.marked_packets;
897
898			q_stats->weight		= rp->red_weight;
899			q_stats->inv_pmax	= rp->red_inv_pmax;
900			q_stats->th_min		= rp->red_thmin;
901			q_stats->th_max		= rp->red_thmax;
902
903#ifdef ALTQ_FLOWVALVE
904			if (rp->red_flowvalve != NULL) {
905				struct flowvalve *fv = rp->red_flowvalve;
906				q_stats->fv_flows    = fv->fv_flows;
907				q_stats->fv_pass     = fv->fv_stats.pass;
908				q_stats->fv_predrop  = fv->fv_stats.predrop;
909				q_stats->fv_alloc    = fv->fv_stats.alloc;
910				q_stats->fv_escape   = fv->fv_stats.escape;
911			} else {
912#endif /* ALTQ_FLOWVALVE */
913				q_stats->fv_flows    = 0;
914				q_stats->fv_pass     = 0;
915				q_stats->fv_predrop  = 0;
916				q_stats->fv_alloc    = 0;
917				q_stats->fv_escape   = 0;
918#ifdef ALTQ_FLOWVALVE
919			}
920#endif /* ALTQ_FLOWVALVE */
921		} while (/*CONSTCOND*/ 0);
922		break;
923
924	case RED_CONFIG:
925		do {
926			struct red_conf *fc;
927			red_t *new;
928			int s, limit;
929
930			fc = (struct red_conf *)addr;
931			if ((rqp = altq_lookup(fc->iface.red_ifname,
932					       ALTQT_RED)) == NULL) {
933				error = EBADF;
934				break;
935			}
936			new = red_alloc(fc->red_weight,
937					fc->red_inv_pmax,
938					fc->red_thmin,
939					fc->red_thmax,
940					fc->red_flags,
941					fc->red_pkttime);
942			if (new == NULL) {
943				error = ENOMEM;
944				break;
945			}
946
947#ifdef __NetBSD__
948			s = splnet();
949#else
950			s = splimp();
951#endif
952			red_purgeq(rqp);
953			limit = fc->red_limit;
954			if (limit < fc->red_thmax)
955				limit = fc->red_thmax;
956			qlimit(rqp->rq_q) = limit;
957			fc->red_limit = limit;	/* write back the new value */
958
959			red_destroy(rqp->rq_red);
960			rqp->rq_red = new;
961
962			splx(s);
963
964			/* write back new values */
965			fc->red_limit = limit;
966			fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
967			fc->red_thmin = rqp->rq_red->red_thmin;
968			fc->red_thmax = rqp->rq_red->red_thmax;
969
970		} while (/*CONSTCOND*/ 0);
971		break;
972
973	case RED_SETDEFAULTS:
974		do {
975			struct redparams *rp;
976
977			rp = (struct redparams *)addr;
978
979			default_th_min = rp->th_min;
980			default_th_max = rp->th_max;
981			default_inv_pmax = rp->inv_pmax;
982		} while (/*CONSTCOND*/ 0);
983		break;
984
985	default:
986		error = EINVAL;
987		break;
988	}
989	return error;
990}
991
992static int
993red_detach(rqp)
994	red_queue_t *rqp;
995{
996	red_queue_t *tmp;
997	int error = 0;
998
999	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
1000		altq_disable(rqp->rq_ifq);
1001
1002	if ((error = altq_detach(rqp->rq_ifq)))
1003		return (error);
1004
1005	if (red_list == rqp)
1006		red_list = rqp->rq_next;
1007	else {
1008		for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
1009			if (tmp->rq_next == rqp) {
1010				tmp->rq_next = rqp->rq_next;
1011				break;
1012			}
1013		if (tmp == NULL)
1014			printf("red_detach: no state found in red_list!\n");
1015	}
1016
1017	red_destroy(rqp->rq_red);
1018	free(rqp->rq_q, M_DEVBUF);
1019	free(rqp, M_DEVBUF);
1020	return (error);
1021}
1022
1023/*
1024 * enqueue routine:
1025 *
1026 *	returns: 0 when successfully queued.
1027 *		 ENOBUFS when drop occurs.
1028 */
1029static int
1030red_enqueue(ifq, m, pktattr)
1031	struct ifaltq *ifq;
1032	struct mbuf *m;
1033	struct altq_pktattr *pktattr;
1034{
1035	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1036
1037	IFQ_LOCK_ASSERT(ifq);
1038
1039	if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
1040		return ENOBUFS;
1041	ifq->ifq_len++;
1042	return 0;
1043}
1044
1045/*
1046 * dequeue routine:
1047 *	must be called in splimp.
1048 *
1049 *	returns: mbuf dequeued.
1050 *		 NULL when no packet is available in the queue.
1051 */
1052
1053static struct mbuf *
1054red_dequeue(ifq, op)
1055	struct ifaltq *ifq;
1056	int op;
1057{
1058	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1059	struct mbuf *m;
1060
1061	IFQ_LOCK_ASSERT(ifq);
1062
1063	if (op == ALTDQ_POLL)
1064		return qhead(rqp->rq_q);
1065
1066	/* op == ALTDQ_REMOVE */
1067	m =  red_getq(rqp->rq_red, rqp->rq_q);
1068	if (m != NULL)
1069		ifq->ifq_len--;
1070	return (m);
1071}
1072
1073static int
1074red_request(ifq, req, arg)
1075	struct ifaltq *ifq;
1076	int req;
1077	void *arg;
1078{
1079	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1080
1081	IFQ_LOCK_ASSERT(ifq);
1082
1083	switch (req) {
1084	case ALTRQ_PURGE:
1085		red_purgeq(rqp);
1086		break;
1087	}
1088	return (0);
1089}
1090
1091static void
1092red_purgeq(rqp)
1093	red_queue_t *rqp;
1094{
1095	_flushq(rqp->rq_q);
1096	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
1097		rqp->rq_ifq->ifq_len = 0;
1098}
1099
1100#ifdef ALTQ_FLOWVALVE
1101
1102#define	FV_PSHIFT	7	/* weight of average drop rate -- 1/128 */
1103#define	FV_PSCALE(x)	((x) << FV_PSHIFT)
1104#define	FV_PUNSCALE(x)	((x) >> FV_PSHIFT)
1105#define	FV_FSHIFT	5	/* weight of average fraction -- 1/32 */
1106#define	FV_FSCALE(x)	((x) << FV_FSHIFT)
1107#define	FV_FUNSCALE(x)	((x) >> FV_FSHIFT)
1108
1109#define	FV_TIMER	(3 * hz)	/* timer value for garbage collector */
1110#define	FV_FLOWLISTSIZE		64	/* how many flows in flowlist */
1111
1112#define	FV_N			10	/* update fve_f every FV_N packets */
1113
1114#define	FV_BACKOFFTHRESH	1  /* backoff threshold interval in second */
1115#define	FV_TTHRESH		3  /* time threshold to delete fve */
1116#define	FV_ALPHA		5  /* extra packet count */
1117
1118#define	FV_STATS
1119
1120#if (__FreeBSD_version > 300000)
1121#define	FV_TIMESTAMP(tp)	getmicrotime(tp)
1122#else
1123#define	FV_TIMESTAMP(tp)	{ (*(tp)) = time; }
1124#endif
1125
1126/*
1127 * Brtt table: 127 entry table to convert drop rate (p) to
1128 * the corresponding bandwidth fraction (f)
1129 * the following equation is implemented to use scaled values,
1130 * fve_p and fve_f, in the fixed point format.
1131 *
1132 *   Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1133 *   f = Brtt(p) / (max_th + alpha)
1134 */
1135#define	BRTT_SIZE	128
1136#define	BRTT_SHIFT	12
1137#define	BRTT_MASK	0x0007f000
1138#define	BRTT_PMAX	(1 << (FV_PSHIFT + FP_SHIFT))
1139
1140const int brtt_tab[BRTT_SIZE] = {
1141	0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1142	392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1143	225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1144	145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1145	98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1146	67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1147	47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1148	33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1149	24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1150	18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1151	14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1152	10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1153	8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1154	6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1155	5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1156	4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1157};
1158
1159static __inline struct fve *
1160flowlist_lookup(fv, pktattr, now)
1161	struct flowvalve *fv;
1162	struct altq_pktattr *pktattr;
1163	struct timeval *now;
1164{
1165	struct fve *fve;
1166	int flows;
1167	struct ip *ip;
1168#ifdef INET6
1169	struct ip6_hdr *ip6;
1170#endif
1171	struct timeval tthresh;
1172
1173	if (pktattr == NULL)
1174		return (NULL);
1175
1176	tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1177	flows = 0;
1178	/*
1179	 * search the flow list
1180	 */
1181	switch (pktattr->pattr_af) {
1182	case AF_INET:
1183		ip = (struct ip *)pktattr->pattr_hdr;
1184		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1185			if (fve->fve_lastdrop.tv_sec == 0)
1186				break;
1187			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1188				fve->fve_lastdrop.tv_sec = 0;
1189				break;
1190			}
1191			if (fve->fve_flow.flow_af == AF_INET &&
1192			    fve->fve_flow.flow_ip.ip_src.s_addr ==
1193			    ip->ip_src.s_addr &&
1194			    fve->fve_flow.flow_ip.ip_dst.s_addr ==
1195			    ip->ip_dst.s_addr)
1196				return (fve);
1197			flows++;
1198		}
1199		break;
1200#ifdef INET6
1201	case AF_INET6:
1202		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1203		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1204			if (fve->fve_lastdrop.tv_sec == 0)
1205				break;
1206			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1207				fve->fve_lastdrop.tv_sec = 0;
1208				break;
1209			}
1210			if (fve->fve_flow.flow_af == AF_INET6 &&
1211			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1212					       &ip6->ip6_src) &&
1213			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1214					       &ip6->ip6_dst))
1215				return (fve);
1216			flows++;
1217		}
1218		break;
1219#endif /* INET6 */
1220
1221	default:
1222		/* unknown protocol.  no drop. */
1223		return (NULL);
1224	}
1225	fv->fv_flows = flows;	/* save the number of active fve's */
1226	return (NULL);
1227}
1228
1229static __inline struct fve *
1230flowlist_reclaim(fv, pktattr)
1231	struct flowvalve *fv;
1232	struct altq_pktattr *pktattr;
1233{
1234	struct fve *fve;
1235	struct ip *ip;
1236#ifdef INET6
1237	struct ip6_hdr *ip6;
1238#endif
1239
1240	/*
1241	 * get an entry from the tail of the LRU list.
1242	 */
1243	fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1244
1245	switch (pktattr->pattr_af) {
1246	case AF_INET:
1247		ip = (struct ip *)pktattr->pattr_hdr;
1248		fve->fve_flow.flow_af = AF_INET;
1249		fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1250		fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1251		break;
1252#ifdef INET6
1253	case AF_INET6:
1254		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1255		fve->fve_flow.flow_af = AF_INET6;
1256		fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1257		fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1258		break;
1259#endif
1260	}
1261
1262	fve->fve_state = Green;
1263	fve->fve_p = 0.0;
1264	fve->fve_f = 0.0;
1265	fve->fve_ifseq = fv->fv_ifseq - 1;
1266	fve->fve_count = 0;
1267
1268	fv->fv_flows++;
1269#ifdef FV_STATS
1270	fv->fv_stats.alloc++;
1271#endif
1272	return (fve);
1273}
1274
1275static __inline void
1276flowlist_move_to_head(fv, fve)
1277	struct flowvalve *fv;
1278	struct fve *fve;
1279{
1280	if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1281		TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1282		TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1283	}
1284}
1285
1286#if 0 /* XXX: make the compiler happy (fv_alloc unused) */
1287/*
1288 * allocate flowvalve structure
1289 */
1290static struct flowvalve *
1291fv_alloc(rp)
1292	struct red *rp;
1293{
1294	struct flowvalve *fv;
1295	struct fve *fve;
1296	int i, num;
1297
1298	num = FV_FLOWLISTSIZE;
1299	fv = malloc(sizeof(struct flowvalve),
1300	       M_DEVBUF, M_WAITOK);
1301	if (fv == NULL)
1302		return (NULL);
1303	bzero(fv, sizeof(struct flowvalve));
1304
1305	fv->fv_fves = malloc(sizeof(struct fve) * num,
1306	       M_DEVBUF, M_WAITOK);
1307	if (fv->fv_fves == NULL) {
1308		free(fv, M_DEVBUF);
1309		return (NULL);
1310	}
1311	bzero(fv->fv_fves, sizeof(struct fve) * num);
1312
1313	fv->fv_flows = 0;
1314	TAILQ_INIT(&fv->fv_flowlist);
1315	for (i = 0; i < num; i++) {
1316		fve = &fv->fv_fves[i];
1317		fve->fve_lastdrop.tv_sec = 0;
1318		TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1319	}
1320
1321	/* initialize drop rate threshold in scaled fixed-point */
1322	fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1323
1324	/* initialize drop rate to fraction table */
1325	fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE,
1326	       M_DEVBUF, M_WAITOK);
1327	if (fv->fv_p2ftab == NULL) {
1328		free(fv->fv_fves, M_DEVBUF);
1329		free(fv, M_DEVBUF);
1330		return (NULL);
1331	}
1332	/*
1333	 * create the p2f table.
1334	 * (shift is used to keep the precision)
1335	 */
1336	for (i = 1; i < BRTT_SIZE; i++) {
1337		int f;
1338
1339		f = brtt_tab[i] << 8;
1340		fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1341	}
1342
1343	return (fv);
1344}
1345#endif
1346
1347static void fv_destroy(fv)
1348	struct flowvalve *fv;
1349{
1350	free(fv->fv_p2ftab, M_DEVBUF);
1351	free(fv->fv_fves, M_DEVBUF);
1352	free(fv, M_DEVBUF);
1353}
1354
1355static __inline int
1356fv_p2f(fv, p)
1357	struct flowvalve	*fv;
1358	int	p;
1359{
1360	int val, f;
1361
1362	if (p >= BRTT_PMAX)
1363		f = fv->fv_p2ftab[BRTT_SIZE-1];
1364	else if ((val = (p & BRTT_MASK)))
1365		f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1366	else
1367		f = fv->fv_p2ftab[1];
1368	return (f);
1369}
1370
1371/*
1372 * check if an arriving packet should be pre-dropped.
1373 * called from red_addq() when a packet arrives.
1374 * returns 1 when the packet should be pre-dropped.
1375 * should be called in splimp.
1376 */
1377static int
1378fv_checkflow(fv, pktattr, fcache)
1379	struct flowvalve *fv;
1380	struct altq_pktattr *pktattr;
1381	struct fve **fcache;
1382{
1383	struct fve *fve;
1384	struct timeval now;
1385
1386	fv->fv_ifseq++;
1387	FV_TIMESTAMP(&now);
1388
1389	if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1390		/* no matching entry in the flowlist */
1391		return (0);
1392
1393	*fcache = fve;
1394
1395	/* update fraction f for every FV_N packets */
1396	if (++fve->fve_count == FV_N) {
1397		/*
1398		 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1399		 */
1400		fve->fve_f =
1401			(FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1402			+ fve->fve_f - FV_FUNSCALE(fve->fve_f);
1403		fve->fve_ifseq = fv->fv_ifseq;
1404		fve->fve_count = 0;
1405	}
1406
1407	/*
1408	 * overpumping test
1409	 */
1410	if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1411		int fthresh;
1412
1413		/* calculate a threshold */
1414		fthresh = fv_p2f(fv, fve->fve_p);
1415		if (fve->fve_f > fthresh)
1416			fve->fve_state = Red;
1417	}
1418
1419	if (fve->fve_state == Red) {
1420		/*
1421		 * backoff test
1422		 */
1423		if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1424			/* no drop for at least FV_BACKOFFTHRESH sec */
1425			fve->fve_p = 0;
1426			fve->fve_state = Green;
1427#ifdef FV_STATS
1428			fv->fv_stats.escape++;
1429#endif
1430		} else {
1431			/* block this flow */
1432			flowlist_move_to_head(fv, fve);
1433			fve->fve_lastdrop = now;
1434#ifdef FV_STATS
1435			fv->fv_stats.predrop++;
1436#endif
1437			return (1);
1438		}
1439	}
1440
1441	/*
1442	 * p = (1 - Wp) * p
1443	 */
1444	fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1445	if (fve->fve_p < 0)
1446		fve->fve_p = 0;
1447#ifdef FV_STATS
1448	fv->fv_stats.pass++;
1449#endif
1450	return (0);
1451}
1452
1453/*
1454 * called from red_addq when a packet is dropped by red.
1455 * should be called in splimp.
1456 */
1457static void fv_dropbyred(fv, pktattr, fcache)
1458	struct flowvalve *fv;
1459	struct altq_pktattr *pktattr;
1460	struct fve *fcache;
1461{
1462	struct fve *fve;
1463	struct timeval now;
1464
1465	if (pktattr == NULL)
1466		return;
1467	FV_TIMESTAMP(&now);
1468
1469	if (fcache != NULL)
1470		/* the fve of this packet is already cached */
1471		fve = fcache;
1472	else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1473		fve = flowlist_reclaim(fv, pktattr);
1474
1475	flowlist_move_to_head(fv, fve);
1476
1477	/*
1478	 * update p:  the following line cancels the update
1479	 *	      in fv_checkflow() and calculate
1480	 *	p = Wp + (1 - Wp) * p
1481	 */
1482	fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1483
1484	fve->fve_lastdrop = now;
1485}
1486
1487#endif /* ALTQ_FLOWVALVE */
1488
1489#ifdef KLD_MODULE
1490
1491static struct altqsw red_sw =
1492	{"red", redopen, redclose, redioctl};
1493
1494ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1495MODULE_VERSION(altq_red, 1);
1496
1497#endif /* KLD_MODULE */
1498#endif /* ALTQ3_COMPAT */
1499
1500#endif /* ALTQ_RED */
1501