altq_red.c revision c067f1844a59213ac1c6f5426e1ee0d103f33b39
1/*-
2 * Copyright (C) 1997-2003
3 *	Sony Computer Science Laboratories Inc.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27/*-
28 * Copyright (c) 1990-1994 Regents of the University of California.
29 * All rights reserved.
30 *
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
34 * 1. Redistributions of source code must retain the above copyright
35 *    notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 *    notice, this list of conditions and the following disclaimer in the
38 *    documentation and/or other materials provided with the distribution.
39 * 3. All advertising materials mentioning features or use of this software
40 *    must display the following acknowledgement:
41 *	This product includes software developed by the Computer Systems
42 *	Engineering Group at Lawrence Berkeley Laboratory.
43 * 4. Neither the name of the University nor of the Laboratory may be used
44 *    to endorse or promote products derived from this software without
45 *    specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * $KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $
60 * $FreeBSD$
61 */
62
63#include "opt_altq.h"
64#include "opt_inet.h"
65#include "opt_inet6.h"
66#ifdef ALTQ_RED	/* red is enabled by ALTQ_RED option in opt_altq.h */
67
68#include <sys/param.h>
69#include <sys/malloc.h>
70#include <sys/mbuf.h>
71#include <sys/socket.h>
72#include <sys/systm.h>
73#include <sys/errno.h>
74#if 1 /* ALTQ3_COMPAT */
75#include <sys/sockio.h>
76#include <sys/proc.h>
77#include <sys/kernel.h>
78#ifdef ALTQ_FLOWVALVE
79#include <sys/queue.h>
80#include <sys/time.h>
81#endif
82#endif /* ALTQ3_COMPAT */
83
84#include <net/if.h>
85#include <net/if_var.h>
86
87#include <netinet/in.h>
88#include <netinet/in_systm.h>
89#include <netinet/ip.h>
90#ifdef INET6
91#include <netinet/ip6.h>
92#endif
93
94#include <netpfil/pf/pf.h>
95#include <netpfil/pf/pf_altq.h>
96#include <netpfil/pf/pf_mtag.h>
97#include <net/altq/altq.h>
98#include <net/altq/altq_red.h>
99#ifdef ALTQ3_COMPAT
100#include <net/altq/altq_conf.h>
101#ifdef ALTQ_FLOWVALVE
102#include <net/altq/altq_flowvalve.h>
103#endif
104#endif
105
106/*
107 * ALTQ/RED (Random Early Detection) implementation using 32-bit
108 * fixed-point calculation.
109 *
110 * written by kjc using the ns code as a reference.
111 * you can learn more about red and ns from Sally's home page at
112 * http://www-nrg.ee.lbl.gov/floyd/
113 *
114 * most of the red parameter values are fixed in this implementation
115 * to prevent fixed-point overflow/underflow.
116 * if you change the parameters, watch out for overflow/underflow!
117 *
118 * the parameters used are recommended values by Sally.
119 * the corresponding ns config looks:
120 *	q_weight=0.00195
121 *	minthresh=5 maxthresh=15 queue-size=60
122 *	linterm=30
123 *	dropmech=drop-tail
124 *	bytes=false (can't be handled by 32-bit fixed-point)
125 *	doubleq=false dqthresh=false
126 *	wait=true
127 */
128/*
129 * alternative red parameters for a slow link.
130 *
131 * assume the queue length becomes from zero to L and keeps L, it takes
132 * N packets for q_avg to reach 63% of L.
133 * when q_weight is 0.002, N is about 500 packets.
134 * for a slow link like dial-up, 500 packets takes more than 1 minute!
135 * when q_weight is 0.008, N is about 127 packets.
136 * when q_weight is 0.016, N is about 63 packets.
137 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
138 * are allowed for 0.016.
139 * see Sally's paper for more details.
140 */
141/* normal red parameters */
142#define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
143				/* q_weight = 0.00195 */
144
145/* red parameters for a slow link */
146#define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
147				/* q_weight = 0.0078125 */
148
149/* red parameters for a very slow link (e.g., dialup) */
150#define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
151				/* q_weight = 0.015625 */
152
153/* fixed-point uses 12-bit decimal places */
154#define	FP_SHIFT	12	/* fixed-point shift */
155
156/* red parameters for drop probability */
157#define	INV_P_MAX	10	/* inverse of max drop probability */
158#define	TH_MIN		5	/* min threshold */
159#define	TH_MAX		15	/* max threshold */
160
161#define	RED_LIMIT	60	/* default max queue lenght */
162#define	RED_STATS		/* collect statistics */
163
164/*
165 * our default policy for forced-drop is drop-tail.
166 * (in altq-1.1.2 or earlier, the default was random-drop.
167 * but it makes more sense to punish the cause of the surge.)
168 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
169 */
170
171#ifdef ALTQ3_COMPAT
172#ifdef ALTQ_FLOWVALVE
173/*
174 * flow-valve is an extention to protect red from unresponsive flows
175 * and to promote end-to-end congestion control.
176 * flow-valve observes the average drop rates of the flows that have
177 * experienced packet drops in the recent past.
178 * when the average drop rate exceeds the threshold, the flow is
179 * blocked by the flow-valve.  the trapped flow should back off
180 * exponentially to escape from the flow-valve.
181 */
182#ifdef RED_RANDOM_DROP
183#error "random-drop can't be used with flow-valve!"
184#endif
185#endif /* ALTQ_FLOWVALVE */
186
187/* red_list keeps all red_queue_t's allocated. */
188static red_queue_t *red_list = NULL;
189
190#endif /* ALTQ3_COMPAT */
191
192/* default red parameter values */
193static int default_th_min = TH_MIN;
194static int default_th_max = TH_MAX;
195static int default_inv_pmax = INV_P_MAX;
196
197#ifdef ALTQ3_COMPAT
198/* internal function prototypes */
199static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
200static struct mbuf *red_dequeue(struct ifaltq *, int);
201static int red_request(struct ifaltq *, int, void *);
202static void red_purgeq(red_queue_t *);
203static int red_detach(red_queue_t *);
204#ifdef ALTQ_FLOWVALVE
205static __inline struct fve *flowlist_lookup(struct flowvalve *,
206			 struct altq_pktattr *, struct timeval *);
207static __inline struct fve *flowlist_reclaim(struct flowvalve *,
208					     struct altq_pktattr *);
209static __inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
210static __inline int fv_p2f(struct flowvalve *, int);
211#if 0 /* XXX: make the compiler happy (fv_alloc unused) */
212static struct flowvalve *fv_alloc(struct red *);
213#endif
214static void fv_destroy(struct flowvalve *);
215static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
216			struct fve **);
217static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
218			 struct fve *);
219#endif
220#endif /* ALTQ3_COMPAT */
221
222/*
223 * red support routines
224 */
225red_t *
226red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
227   int pkttime)
228{
229	red_t	*rp;
230	int	 w, i;
231	int	 npkts_per_sec;
232
233	rp = malloc(sizeof(red_t), M_DEVBUF, M_NOWAIT | M_ZERO);
234	if (rp == NULL)
235		return (NULL);
236
237	if (weight == 0)
238		rp->red_weight = W_WEIGHT;
239	else
240		rp->red_weight = weight;
241
242	/* allocate weight table */
243	rp->red_wtab = wtab_alloc(rp->red_weight);
244	if (rp->red_wtab == NULL) {
245		free(rp, M_DEVBUF);
246		return (NULL);
247	}
248
249	rp->red_avg = 0;
250	rp->red_idle = 1;
251
252	if (inv_pmax == 0)
253		rp->red_inv_pmax = default_inv_pmax;
254	else
255		rp->red_inv_pmax = inv_pmax;
256	if (th_min == 0)
257		rp->red_thmin = default_th_min;
258	else
259		rp->red_thmin = th_min;
260	if (th_max == 0)
261		rp->red_thmax = default_th_max;
262	else
263		rp->red_thmax = th_max;
264
265	rp->red_flags = flags;
266
267	if (pkttime == 0)
268		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
269		rp->red_pkttime = 800;
270	else
271		rp->red_pkttime = pkttime;
272
273	if (weight == 0) {
274		/* when the link is very slow, adjust red parameters */
275		npkts_per_sec = 1000000 / rp->red_pkttime;
276		if (npkts_per_sec < 50) {
277			/* up to about 400Kbps */
278			rp->red_weight = W_WEIGHT_2;
279		} else if (npkts_per_sec < 300) {
280			/* up to about 2.4Mbps */
281			rp->red_weight = W_WEIGHT_1;
282		}
283	}
284
285	/* calculate wshift.  weight must be power of 2 */
286	w = rp->red_weight;
287	for (i = 0; w > 1; i++)
288		w = w >> 1;
289	rp->red_wshift = i;
290	w = 1 << rp->red_wshift;
291	if (w != rp->red_weight) {
292		printf("invalid weight value %d for red! use %d\n",
293		       rp->red_weight, w);
294		rp->red_weight = w;
295	}
296
297	/*
298	 * thmin_s and thmax_s are scaled versions of th_min and th_max
299	 * to be compared with avg.
300	 */
301	rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
302	rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
303
304	/*
305	 * precompute probability denominator
306	 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
307	 */
308	rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
309			 * rp->red_inv_pmax) << FP_SHIFT;
310
311	microtime(&rp->red_last);
312	return (rp);
313}
314
315void
316red_destroy(red_t *rp)
317{
318#ifdef ALTQ3_COMPAT
319#ifdef ALTQ_FLOWVALVE
320	if (rp->red_flowvalve != NULL)
321		fv_destroy(rp->red_flowvalve);
322#endif
323#endif /* ALTQ3_COMPAT */
324	wtab_destroy(rp->red_wtab);
325	free(rp, M_DEVBUF);
326}
327
328void
329red_getstats(red_t *rp, struct redstats *sp)
330{
331	sp->q_avg		= rp->red_avg >> rp->red_wshift;
332	sp->xmit_cnt		= rp->red_stats.xmit_cnt;
333	sp->drop_cnt		= rp->red_stats.drop_cnt;
334	sp->drop_forced		= rp->red_stats.drop_forced;
335	sp->drop_unforced	= rp->red_stats.drop_unforced;
336	sp->marked_packets	= rp->red_stats.marked_packets;
337}
338
339int
340red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
341    struct altq_pktattr *pktattr)
342{
343	int avg, droptype;
344	int n;
345#ifdef ALTQ3_COMPAT
346#ifdef ALTQ_FLOWVALVE
347	struct fve *fve = NULL;
348
349	if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
350		if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
351			m_freem(m);
352			return (-1);
353		}
354#endif
355#endif /* ALTQ3_COMPAT */
356
357	avg = rp->red_avg;
358
359	/*
360	 * if we were idle, we pretend that n packets arrived during
361	 * the idle period.
362	 */
363	if (rp->red_idle) {
364		struct timeval now;
365		int t;
366
367		rp->red_idle = 0;
368		microtime(&now);
369		t = (now.tv_sec - rp->red_last.tv_sec);
370		if (t > 60) {
371			/*
372			 * being idle for more than 1 minute, set avg to zero.
373			 * this prevents t from overflow.
374			 */
375			avg = 0;
376		} else {
377			t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
378			n = t / rp->red_pkttime - 1;
379
380			/* the following line does (avg = (1 - Wq)^n * avg) */
381			if (n > 0)
382				avg = (avg >> FP_SHIFT) *
383				    pow_w(rp->red_wtab, n);
384		}
385	}
386
387	/* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
388	avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
389	rp->red_avg = avg;		/* save the new value */
390
391	/*
392	 * red_count keeps a tally of arriving traffic that has not
393	 * been dropped.
394	 */
395	rp->red_count++;
396
397	/* see if we drop early */
398	droptype = DTYPE_NODROP;
399	if (avg >= rp->red_thmin_s && qlen(q) > 1) {
400		if (avg >= rp->red_thmax_s) {
401			/* avg >= th_max: forced drop */
402			droptype = DTYPE_FORCED;
403		} else if (rp->red_old == 0) {
404			/* first exceeds th_min */
405			rp->red_count = 1;
406			rp->red_old = 1;
407		} else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
408				      rp->red_probd, rp->red_count)) {
409			/* mark or drop by red */
410			if ((rp->red_flags & REDF_ECN) &&
411			    mark_ecn(m, pktattr, rp->red_flags)) {
412				/* successfully marked.  do not drop. */
413				rp->red_count = 0;
414#ifdef RED_STATS
415				rp->red_stats.marked_packets++;
416#endif
417			} else {
418				/* unforced drop by red */
419				droptype = DTYPE_EARLY;
420			}
421		}
422	} else {
423		/* avg < th_min */
424		rp->red_old = 0;
425	}
426
427	/*
428	 * if the queue length hits the hard limit, it's a forced drop.
429	 */
430	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
431		droptype = DTYPE_FORCED;
432
433#ifdef RED_RANDOM_DROP
434	/* if successful or forced drop, enqueue this packet. */
435	if (droptype != DTYPE_EARLY)
436		_addq(q, m);
437#else
438	/* if successful, enqueue this packet. */
439	if (droptype == DTYPE_NODROP)
440		_addq(q, m);
441#endif
442	if (droptype != DTYPE_NODROP) {
443		if (droptype == DTYPE_EARLY) {
444			/* drop the incoming packet */
445#ifdef RED_STATS
446			rp->red_stats.drop_unforced++;
447#endif
448		} else {
449			/* forced drop, select a victim packet in the queue. */
450#ifdef RED_RANDOM_DROP
451			m = _getq_random(q);
452#endif
453#ifdef RED_STATS
454			rp->red_stats.drop_forced++;
455#endif
456		}
457#ifdef RED_STATS
458		PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
459#endif
460		rp->red_count = 0;
461#ifdef ALTQ3_COMPAT
462#ifdef ALTQ_FLOWVALVE
463		if (rp->red_flowvalve != NULL)
464			fv_dropbyred(rp->red_flowvalve, pktattr, fve);
465#endif
466#endif /* ALTQ3_COMPAT */
467		m_freem(m);
468		return (-1);
469	}
470	/* successfully queued */
471#ifdef RED_STATS
472	PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
473#endif
474	return (0);
475}
476
477/*
478 * early-drop probability is calculated as follows:
479 *   prob = p_max * (avg - th_min) / (th_max - th_min)
480 *   prob_a = prob / (2 - count*prob)
481 *	    = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
482 * here prob_a increases as successive undrop count increases.
483 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
484 * becomes 1 when (count >= (2 / prob))).
485 */
486int
487drop_early(int fp_len, int fp_probd, int count)
488{
489	int	d;		/* denominator of drop-probability */
490
491	d = fp_probd - count * fp_len;
492	if (d <= 0)
493		/* count exceeds the hard limit: drop or mark */
494		return (1);
495
496	/*
497	 * now the range of d is [1..600] in fixed-point. (when
498	 * th_max-th_min=10 and p_max=1/30)
499	 * drop probability = (avg - TH_MIN) / d
500	 */
501
502	if ((arc4random() % d) < fp_len) {
503		/* drop or mark */
504		return (1);
505	}
506	/* no drop/mark */
507	return (0);
508}
509
510/*
511 * try to mark CE bit to the packet.
512 *    returns 1 if successfully marked, 0 otherwise.
513 */
514int
515mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
516{
517	struct mbuf	*m0;
518	struct pf_mtag	*at;
519	void		*hdr;
520
521	at = pf_find_mtag(m);
522	if (at != NULL) {
523		hdr = at->hdr;
524#ifdef ALTQ3_COMPAT
525	} else if (pktattr != NULL) {
526		af = pktattr->pattr_af;
527		hdr = pktattr->pattr_hdr;
528#endif /* ALTQ3_COMPAT */
529	} else
530		return (0);
531
532	/* verify that pattr_hdr is within the mbuf data */
533	for (m0 = m; m0 != NULL; m0 = m0->m_next)
534		if (((caddr_t)hdr >= m0->m_data) &&
535		    ((caddr_t)hdr < m0->m_data + m0->m_len))
536			break;
537	if (m0 == NULL) {
538		/* ick, tag info is stale */
539		return (0);
540	}
541
542	switch (((struct ip *)hdr)->ip_v) {
543	case IPVERSION:
544		if (flags & REDF_ECN4) {
545			struct ip *ip = hdr;
546			u_int8_t otos;
547			int sum;
548
549			if (ip->ip_v != 4)
550				return (0);	/* version mismatch! */
551
552			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
553				return (0);	/* not-ECT */
554			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
555				return (1);	/* already marked */
556
557			/*
558			 * ecn-capable but not marked,
559			 * mark CE and update checksum
560			 */
561			otos = ip->ip_tos;
562			ip->ip_tos |= IPTOS_ECN_CE;
563			/*
564			 * update checksum (from RFC1624)
565			 *	   HC' = ~(~HC + ~m + m')
566			 */
567			sum = ~ntohs(ip->ip_sum) & 0xffff;
568			sum += (~otos & 0xffff) + ip->ip_tos;
569			sum = (sum >> 16) + (sum & 0xffff);
570			sum += (sum >> 16);  /* add carry */
571			ip->ip_sum = htons(~sum & 0xffff);
572			return (1);
573		}
574		break;
575#ifdef INET6
576	case (IPV6_VERSION >> 4):
577		if (flags & REDF_ECN6) {
578			struct ip6_hdr *ip6 = hdr;
579			u_int32_t flowlabel;
580
581			flowlabel = ntohl(ip6->ip6_flow);
582			if ((flowlabel >> 28) != 6)
583				return (0);	/* version mismatch! */
584			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
585			    (IPTOS_ECN_NOTECT << 20))
586				return (0);	/* not-ECT */
587			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
588			    (IPTOS_ECN_CE << 20))
589				return (1);	/* already marked */
590			/*
591			 * ecn-capable but not marked,  mark CE
592			 */
593			flowlabel |= (IPTOS_ECN_CE << 20);
594			ip6->ip6_flow = htonl(flowlabel);
595			return (1);
596		}
597		break;
598#endif  /* INET6 */
599	}
600
601	/* not marked */
602	return (0);
603}
604
605struct mbuf *
606red_getq(rp, q)
607	red_t *rp;
608	class_queue_t *q;
609{
610	struct mbuf *m;
611
612	if ((m = _getq(q)) == NULL) {
613		if (rp->red_idle == 0) {
614			rp->red_idle = 1;
615			microtime(&rp->red_last);
616		}
617		return NULL;
618	}
619
620	rp->red_idle = 0;
621	return (m);
622}
623
624/*
625 * helper routine to calibrate avg during idle.
626 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
627 * here Wq = 1/weight and the code assumes Wq is close to zero.
628 *
629 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
630 */
631static struct wtab *wtab_list = NULL;	/* pointer to wtab list */
632
633struct wtab *
634wtab_alloc(int weight)
635{
636	struct wtab	*w;
637	int		 i;
638
639	for (w = wtab_list; w != NULL; w = w->w_next)
640		if (w->w_weight == weight) {
641			w->w_refcount++;
642			return (w);
643		}
644
645	w = malloc(sizeof(struct wtab), M_DEVBUF, M_NOWAIT | M_ZERO);
646	if (w == NULL)
647		return (NULL);
648	w->w_weight = weight;
649	w->w_refcount = 1;
650	w->w_next = wtab_list;
651	wtab_list = w;
652
653	/* initialize the weight table */
654	w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
655	for (i = 1; i < 32; i++) {
656		w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
657		if (w->w_tab[i] == 0 && w->w_param_max == 0)
658			w->w_param_max = 1 << i;
659	}
660
661	return (w);
662}
663
664int
665wtab_destroy(struct wtab *w)
666{
667	struct wtab	*prev;
668
669	if (--w->w_refcount > 0)
670		return (0);
671
672	if (wtab_list == w)
673		wtab_list = w->w_next;
674	else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
675		if (prev->w_next == w) {
676			prev->w_next = w->w_next;
677			break;
678		}
679
680	free(w, M_DEVBUF);
681	return (0);
682}
683
684int32_t
685pow_w(struct wtab *w, int n)
686{
687	int	i, bit;
688	int32_t	val;
689
690	if (n >= w->w_param_max)
691		return (0);
692
693	val = 1 << FP_SHIFT;
694	if (n <= 0)
695		return (val);
696
697	bit = 1;
698	i = 0;
699	while (n) {
700		if (n & bit) {
701			val = (val * w->w_tab[i]) >> FP_SHIFT;
702			n &= ~bit;
703		}
704		i++;
705		bit <<=  1;
706	}
707	return (val);
708}
709
710#ifdef ALTQ3_COMPAT
711/*
712 * red device interface
713 */
714altqdev_decl(red);
715
716int
717redopen(dev, flag, fmt, p)
718	dev_t dev;
719	int flag, fmt;
720#if (__FreeBSD_version > 500000)
721	struct thread *p;
722#else
723	struct proc *p;
724#endif
725{
726	/* everything will be done when the queueing scheme is attached. */
727	return 0;
728}
729
730int
731redclose(dev, flag, fmt, p)
732	dev_t dev;
733	int flag, fmt;
734#if (__FreeBSD_version > 500000)
735	struct thread *p;
736#else
737	struct proc *p;
738#endif
739{
740	red_queue_t *rqp;
741	int err, error = 0;
742
743	while ((rqp = red_list) != NULL) {
744		/* destroy all */
745		err = red_detach(rqp);
746		if (err != 0 && error == 0)
747			error = err;
748	}
749
750	return error;
751}
752
753int
754redioctl(dev, cmd, addr, flag, p)
755	dev_t dev;
756	ioctlcmd_t cmd;
757	caddr_t addr;
758	int flag;
759#if (__FreeBSD_version > 500000)
760	struct thread *p;
761#else
762	struct proc *p;
763#endif
764{
765	red_queue_t *rqp;
766	struct red_interface *ifacep;
767	struct ifnet *ifp;
768	int	error = 0;
769
770	/* check super-user privilege */
771	switch (cmd) {
772	case RED_GETSTATS:
773		break;
774	default:
775#if (__FreeBSD_version > 700000)
776		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
777#elsif (__FreeBSD_version > 400000)
778		if ((error = suser(p)) != 0)
779#else
780		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
781#endif
782			return (error);
783		break;
784	}
785
786	switch (cmd) {
787
788	case RED_ENABLE:
789		ifacep = (struct red_interface *)addr;
790		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
791			error = EBADF;
792			break;
793		}
794		error = altq_enable(rqp->rq_ifq);
795		break;
796
797	case RED_DISABLE:
798		ifacep = (struct red_interface *)addr;
799		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
800			error = EBADF;
801			break;
802		}
803		error = altq_disable(rqp->rq_ifq);
804		break;
805
806	case RED_IF_ATTACH:
807		ifp = ifunit(((struct red_interface *)addr)->red_ifname);
808		if (ifp == NULL) {
809			error = ENXIO;
810			break;
811		}
812
813		/* allocate and initialize red_queue_t */
814		rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
815		if (rqp == NULL) {
816			error = ENOMEM;
817			break;
818		}
819		bzero(rqp, sizeof(red_queue_t));
820
821		rqp->rq_q = malloc(sizeof(class_queue_t),
822		       M_DEVBUF, M_WAITOK);
823		if (rqp->rq_q == NULL) {
824			free(rqp, M_DEVBUF);
825			error = ENOMEM;
826			break;
827		}
828		bzero(rqp->rq_q, sizeof(class_queue_t));
829
830		rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
831		if (rqp->rq_red == NULL) {
832			free(rqp->rq_q, M_DEVBUF);
833			free(rqp, M_DEVBUF);
834			error = ENOMEM;
835			break;
836		}
837
838		rqp->rq_ifq = &ifp->if_snd;
839		qtail(rqp->rq_q) = NULL;
840		qlen(rqp->rq_q) = 0;
841		qlimit(rqp->rq_q) = RED_LIMIT;
842		qtype(rqp->rq_q) = Q_RED;
843
844		/*
845		 * set RED to this ifnet structure.
846		 */
847		error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
848				    red_enqueue, red_dequeue, red_request,
849				    NULL, NULL);
850		if (error) {
851			red_destroy(rqp->rq_red);
852			free(rqp->rq_q, M_DEVBUF);
853			free(rqp, M_DEVBUF);
854			break;
855		}
856
857		/* add this state to the red list */
858		rqp->rq_next = red_list;
859		red_list = rqp;
860		break;
861
862	case RED_IF_DETACH:
863		ifacep = (struct red_interface *)addr;
864		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
865			error = EBADF;
866			break;
867		}
868		error = red_detach(rqp);
869		break;
870
871	case RED_GETSTATS:
872		do {
873			struct red_stats *q_stats;
874			red_t *rp;
875
876			q_stats = (struct red_stats *)addr;
877			if ((rqp = altq_lookup(q_stats->iface.red_ifname,
878					     ALTQT_RED)) == NULL) {
879				error = EBADF;
880				break;
881			}
882
883			q_stats->q_len 	   = qlen(rqp->rq_q);
884			q_stats->q_limit   = qlimit(rqp->rq_q);
885
886			rp = rqp->rq_red;
887			q_stats->q_avg 	   = rp->red_avg >> rp->red_wshift;
888			q_stats->xmit_cnt  = rp->red_stats.xmit_cnt;
889			q_stats->drop_cnt  = rp->red_stats.drop_cnt;
890			q_stats->drop_forced   = rp->red_stats.drop_forced;
891			q_stats->drop_unforced = rp->red_stats.drop_unforced;
892			q_stats->marked_packets = rp->red_stats.marked_packets;
893
894			q_stats->weight		= rp->red_weight;
895			q_stats->inv_pmax	= rp->red_inv_pmax;
896			q_stats->th_min		= rp->red_thmin;
897			q_stats->th_max		= rp->red_thmax;
898
899#ifdef ALTQ_FLOWVALVE
900			if (rp->red_flowvalve != NULL) {
901				struct flowvalve *fv = rp->red_flowvalve;
902				q_stats->fv_flows    = fv->fv_flows;
903				q_stats->fv_pass     = fv->fv_stats.pass;
904				q_stats->fv_predrop  = fv->fv_stats.predrop;
905				q_stats->fv_alloc    = fv->fv_stats.alloc;
906				q_stats->fv_escape   = fv->fv_stats.escape;
907			} else {
908#endif /* ALTQ_FLOWVALVE */
909				q_stats->fv_flows    = 0;
910				q_stats->fv_pass     = 0;
911				q_stats->fv_predrop  = 0;
912				q_stats->fv_alloc    = 0;
913				q_stats->fv_escape   = 0;
914#ifdef ALTQ_FLOWVALVE
915			}
916#endif /* ALTQ_FLOWVALVE */
917		} while (/*CONSTCOND*/ 0);
918		break;
919
920	case RED_CONFIG:
921		do {
922			struct red_conf *fc;
923			red_t *new;
924			int s, limit;
925
926			fc = (struct red_conf *)addr;
927			if ((rqp = altq_lookup(fc->iface.red_ifname,
928					       ALTQT_RED)) == NULL) {
929				error = EBADF;
930				break;
931			}
932			new = red_alloc(fc->red_weight,
933					fc->red_inv_pmax,
934					fc->red_thmin,
935					fc->red_thmax,
936					fc->red_flags,
937					fc->red_pkttime);
938			if (new == NULL) {
939				error = ENOMEM;
940				break;
941			}
942
943			s = splnet();
944			red_purgeq(rqp);
945			limit = fc->red_limit;
946			if (limit < fc->red_thmax)
947				limit = fc->red_thmax;
948			qlimit(rqp->rq_q) = limit;
949			fc->red_limit = limit;	/* write back the new value */
950
951			red_destroy(rqp->rq_red);
952			rqp->rq_red = new;
953
954			splx(s);
955
956			/* write back new values */
957			fc->red_limit = limit;
958			fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
959			fc->red_thmin = rqp->rq_red->red_thmin;
960			fc->red_thmax = rqp->rq_red->red_thmax;
961
962		} while (/*CONSTCOND*/ 0);
963		break;
964
965	case RED_SETDEFAULTS:
966		do {
967			struct redparams *rp;
968
969			rp = (struct redparams *)addr;
970
971			default_th_min = rp->th_min;
972			default_th_max = rp->th_max;
973			default_inv_pmax = rp->inv_pmax;
974		} while (/*CONSTCOND*/ 0);
975		break;
976
977	default:
978		error = EINVAL;
979		break;
980	}
981	return error;
982}
983
984static int
985red_detach(rqp)
986	red_queue_t *rqp;
987{
988	red_queue_t *tmp;
989	int error = 0;
990
991	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
992		altq_disable(rqp->rq_ifq);
993
994	if ((error = altq_detach(rqp->rq_ifq)))
995		return (error);
996
997	if (red_list == rqp)
998		red_list = rqp->rq_next;
999	else {
1000		for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
1001			if (tmp->rq_next == rqp) {
1002				tmp->rq_next = rqp->rq_next;
1003				break;
1004			}
1005		if (tmp == NULL)
1006			printf("red_detach: no state found in red_list!\n");
1007	}
1008
1009	red_destroy(rqp->rq_red);
1010	free(rqp->rq_q, M_DEVBUF);
1011	free(rqp, M_DEVBUF);
1012	return (error);
1013}
1014
1015/*
1016 * enqueue routine:
1017 *
1018 *	returns: 0 when successfully queued.
1019 *		 ENOBUFS when drop occurs.
1020 */
1021static int
1022red_enqueue(ifq, m, pktattr)
1023	struct ifaltq *ifq;
1024	struct mbuf *m;
1025	struct altq_pktattr *pktattr;
1026{
1027	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1028
1029	IFQ_LOCK_ASSERT(ifq);
1030
1031	if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
1032		return ENOBUFS;
1033	ifq->ifq_len++;
1034	return 0;
1035}
1036
1037/*
1038 * dequeue routine:
1039 *	must be called in splimp.
1040 *
1041 *	returns: mbuf dequeued.
1042 *		 NULL when no packet is available in the queue.
1043 */
1044
1045static struct mbuf *
1046red_dequeue(ifq, op)
1047	struct ifaltq *ifq;
1048	int op;
1049{
1050	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1051	struct mbuf *m;
1052
1053	IFQ_LOCK_ASSERT(ifq);
1054
1055	if (op == ALTDQ_POLL)
1056		return qhead(rqp->rq_q);
1057
1058	/* op == ALTDQ_REMOVE */
1059	m =  red_getq(rqp->rq_red, rqp->rq_q);
1060	if (m != NULL)
1061		ifq->ifq_len--;
1062	return (m);
1063}
1064
1065static int
1066red_request(ifq, req, arg)
1067	struct ifaltq *ifq;
1068	int req;
1069	void *arg;
1070{
1071	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1072
1073	IFQ_LOCK_ASSERT(ifq);
1074
1075	switch (req) {
1076	case ALTRQ_PURGE:
1077		red_purgeq(rqp);
1078		break;
1079	}
1080	return (0);
1081}
1082
1083static void
1084red_purgeq(rqp)
1085	red_queue_t *rqp;
1086{
1087	_flushq(rqp->rq_q);
1088	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
1089		rqp->rq_ifq->ifq_len = 0;
1090}
1091
1092#ifdef ALTQ_FLOWVALVE
1093
1094#define	FV_PSHIFT	7	/* weight of average drop rate -- 1/128 */
1095#define	FV_PSCALE(x)	((x) << FV_PSHIFT)
1096#define	FV_PUNSCALE(x)	((x) >> FV_PSHIFT)
1097#define	FV_FSHIFT	5	/* weight of average fraction -- 1/32 */
1098#define	FV_FSCALE(x)	((x) << FV_FSHIFT)
1099#define	FV_FUNSCALE(x)	((x) >> FV_FSHIFT)
1100
1101#define	FV_TIMER	(3 * hz)	/* timer value for garbage collector */
1102#define	FV_FLOWLISTSIZE		64	/* how many flows in flowlist */
1103
1104#define	FV_N			10	/* update fve_f every FV_N packets */
1105
1106#define	FV_BACKOFFTHRESH	1  /* backoff threshold interval in second */
1107#define	FV_TTHRESH		3  /* time threshold to delete fve */
1108#define	FV_ALPHA		5  /* extra packet count */
1109
1110#define	FV_STATS
1111
1112#if (__FreeBSD_version > 300000)
1113#define	FV_TIMESTAMP(tp)	getmicrotime(tp)
1114#else
1115#define	FV_TIMESTAMP(tp)	{ (*(tp)) = time; }
1116#endif
1117
1118/*
1119 * Brtt table: 127 entry table to convert drop rate (p) to
1120 * the corresponding bandwidth fraction (f)
1121 * the following equation is implemented to use scaled values,
1122 * fve_p and fve_f, in the fixed point format.
1123 *
1124 *   Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1125 *   f = Brtt(p) / (max_th + alpha)
1126 */
1127#define	BRTT_SIZE	128
1128#define	BRTT_SHIFT	12
1129#define	BRTT_MASK	0x0007f000
1130#define	BRTT_PMAX	(1 << (FV_PSHIFT + FP_SHIFT))
1131
1132const int brtt_tab[BRTT_SIZE] = {
1133	0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1134	392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1135	225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1136	145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1137	98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1138	67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1139	47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1140	33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1141	24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1142	18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1143	14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1144	10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1145	8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1146	6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1147	5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1148	4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1149};
1150
1151static __inline struct fve *
1152flowlist_lookup(fv, pktattr, now)
1153	struct flowvalve *fv;
1154	struct altq_pktattr *pktattr;
1155	struct timeval *now;
1156{
1157	struct fve *fve;
1158	int flows;
1159	struct ip *ip;
1160#ifdef INET6
1161	struct ip6_hdr *ip6;
1162#endif
1163	struct timeval tthresh;
1164
1165	if (pktattr == NULL)
1166		return (NULL);
1167
1168	tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1169	flows = 0;
1170	/*
1171	 * search the flow list
1172	 */
1173	switch (pktattr->pattr_af) {
1174	case AF_INET:
1175		ip = (struct ip *)pktattr->pattr_hdr;
1176		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1177			if (fve->fve_lastdrop.tv_sec == 0)
1178				break;
1179			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1180				fve->fve_lastdrop.tv_sec = 0;
1181				break;
1182			}
1183			if (fve->fve_flow.flow_af == AF_INET &&
1184			    fve->fve_flow.flow_ip.ip_src.s_addr ==
1185			    ip->ip_src.s_addr &&
1186			    fve->fve_flow.flow_ip.ip_dst.s_addr ==
1187			    ip->ip_dst.s_addr)
1188				return (fve);
1189			flows++;
1190		}
1191		break;
1192#ifdef INET6
1193	case AF_INET6:
1194		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1195		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1196			if (fve->fve_lastdrop.tv_sec == 0)
1197				break;
1198			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1199				fve->fve_lastdrop.tv_sec = 0;
1200				break;
1201			}
1202			if (fve->fve_flow.flow_af == AF_INET6 &&
1203			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1204					       &ip6->ip6_src) &&
1205			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1206					       &ip6->ip6_dst))
1207				return (fve);
1208			flows++;
1209		}
1210		break;
1211#endif /* INET6 */
1212
1213	default:
1214		/* unknown protocol.  no drop. */
1215		return (NULL);
1216	}
1217	fv->fv_flows = flows;	/* save the number of active fve's */
1218	return (NULL);
1219}
1220
1221static __inline struct fve *
1222flowlist_reclaim(fv, pktattr)
1223	struct flowvalve *fv;
1224	struct altq_pktattr *pktattr;
1225{
1226	struct fve *fve;
1227	struct ip *ip;
1228#ifdef INET6
1229	struct ip6_hdr *ip6;
1230#endif
1231
1232	/*
1233	 * get an entry from the tail of the LRU list.
1234	 */
1235	fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1236
1237	switch (pktattr->pattr_af) {
1238	case AF_INET:
1239		ip = (struct ip *)pktattr->pattr_hdr;
1240		fve->fve_flow.flow_af = AF_INET;
1241		fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1242		fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1243		break;
1244#ifdef INET6
1245	case AF_INET6:
1246		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1247		fve->fve_flow.flow_af = AF_INET6;
1248		fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1249		fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1250		break;
1251#endif
1252	}
1253
1254	fve->fve_state = Green;
1255	fve->fve_p = 0.0;
1256	fve->fve_f = 0.0;
1257	fve->fve_ifseq = fv->fv_ifseq - 1;
1258	fve->fve_count = 0;
1259
1260	fv->fv_flows++;
1261#ifdef FV_STATS
1262	fv->fv_stats.alloc++;
1263#endif
1264	return (fve);
1265}
1266
1267static __inline void
1268flowlist_move_to_head(fv, fve)
1269	struct flowvalve *fv;
1270	struct fve *fve;
1271{
1272	if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1273		TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1274		TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1275	}
1276}
1277
1278#if 0 /* XXX: make the compiler happy (fv_alloc unused) */
1279/*
1280 * allocate flowvalve structure
1281 */
1282static struct flowvalve *
1283fv_alloc(rp)
1284	struct red *rp;
1285{
1286	struct flowvalve *fv;
1287	struct fve *fve;
1288	int i, num;
1289
1290	num = FV_FLOWLISTSIZE;
1291	fv = malloc(sizeof(struct flowvalve),
1292	       M_DEVBUF, M_WAITOK);
1293	if (fv == NULL)
1294		return (NULL);
1295	bzero(fv, sizeof(struct flowvalve));
1296
1297	fv->fv_fves = malloc(sizeof(struct fve) * num,
1298	       M_DEVBUF, M_WAITOK);
1299	if (fv->fv_fves == NULL) {
1300		free(fv, M_DEVBUF);
1301		return (NULL);
1302	}
1303	bzero(fv->fv_fves, sizeof(struct fve) * num);
1304
1305	fv->fv_flows = 0;
1306	TAILQ_INIT(&fv->fv_flowlist);
1307	for (i = 0; i < num; i++) {
1308		fve = &fv->fv_fves[i];
1309		fve->fve_lastdrop.tv_sec = 0;
1310		TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1311	}
1312
1313	/* initialize drop rate threshold in scaled fixed-point */
1314	fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1315
1316	/* initialize drop rate to fraction table */
1317	fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE,
1318	       M_DEVBUF, M_WAITOK);
1319	if (fv->fv_p2ftab == NULL) {
1320		free(fv->fv_fves, M_DEVBUF);
1321		free(fv, M_DEVBUF);
1322		return (NULL);
1323	}
1324	/*
1325	 * create the p2f table.
1326	 * (shift is used to keep the precision)
1327	 */
1328	for (i = 1; i < BRTT_SIZE; i++) {
1329		int f;
1330
1331		f = brtt_tab[i] << 8;
1332		fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1333	}
1334
1335	return (fv);
1336}
1337#endif
1338
1339static void fv_destroy(fv)
1340	struct flowvalve *fv;
1341{
1342	free(fv->fv_p2ftab, M_DEVBUF);
1343	free(fv->fv_fves, M_DEVBUF);
1344	free(fv, M_DEVBUF);
1345}
1346
1347static __inline int
1348fv_p2f(fv, p)
1349	struct flowvalve	*fv;
1350	int	p;
1351{
1352	int val, f;
1353
1354	if (p >= BRTT_PMAX)
1355		f = fv->fv_p2ftab[BRTT_SIZE-1];
1356	else if ((val = (p & BRTT_MASK)))
1357		f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1358	else
1359		f = fv->fv_p2ftab[1];
1360	return (f);
1361}
1362
1363/*
1364 * check if an arriving packet should be pre-dropped.
1365 * called from red_addq() when a packet arrives.
1366 * returns 1 when the packet should be pre-dropped.
1367 * should be called in splimp.
1368 */
1369static int
1370fv_checkflow(fv, pktattr, fcache)
1371	struct flowvalve *fv;
1372	struct altq_pktattr *pktattr;
1373	struct fve **fcache;
1374{
1375	struct fve *fve;
1376	struct timeval now;
1377
1378	fv->fv_ifseq++;
1379	FV_TIMESTAMP(&now);
1380
1381	if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1382		/* no matching entry in the flowlist */
1383		return (0);
1384
1385	*fcache = fve;
1386
1387	/* update fraction f for every FV_N packets */
1388	if (++fve->fve_count == FV_N) {
1389		/*
1390		 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1391		 */
1392		fve->fve_f =
1393			(FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1394			+ fve->fve_f - FV_FUNSCALE(fve->fve_f);
1395		fve->fve_ifseq = fv->fv_ifseq;
1396		fve->fve_count = 0;
1397	}
1398
1399	/*
1400	 * overpumping test
1401	 */
1402	if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1403		int fthresh;
1404
1405		/* calculate a threshold */
1406		fthresh = fv_p2f(fv, fve->fve_p);
1407		if (fve->fve_f > fthresh)
1408			fve->fve_state = Red;
1409	}
1410
1411	if (fve->fve_state == Red) {
1412		/*
1413		 * backoff test
1414		 */
1415		if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1416			/* no drop for at least FV_BACKOFFTHRESH sec */
1417			fve->fve_p = 0;
1418			fve->fve_state = Green;
1419#ifdef FV_STATS
1420			fv->fv_stats.escape++;
1421#endif
1422		} else {
1423			/* block this flow */
1424			flowlist_move_to_head(fv, fve);
1425			fve->fve_lastdrop = now;
1426#ifdef FV_STATS
1427			fv->fv_stats.predrop++;
1428#endif
1429			return (1);
1430		}
1431	}
1432
1433	/*
1434	 * p = (1 - Wp) * p
1435	 */
1436	fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1437	if (fve->fve_p < 0)
1438		fve->fve_p = 0;
1439#ifdef FV_STATS
1440	fv->fv_stats.pass++;
1441#endif
1442	return (0);
1443}
1444
1445/*
1446 * called from red_addq when a packet is dropped by red.
1447 * should be called in splimp.
1448 */
1449static void fv_dropbyred(fv, pktattr, fcache)
1450	struct flowvalve *fv;
1451	struct altq_pktattr *pktattr;
1452	struct fve *fcache;
1453{
1454	struct fve *fve;
1455	struct timeval now;
1456
1457	if (pktattr == NULL)
1458		return;
1459	FV_TIMESTAMP(&now);
1460
1461	if (fcache != NULL)
1462		/* the fve of this packet is already cached */
1463		fve = fcache;
1464	else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1465		fve = flowlist_reclaim(fv, pktattr);
1466
1467	flowlist_move_to_head(fv, fve);
1468
1469	/*
1470	 * update p:  the following line cancels the update
1471	 *	      in fv_checkflow() and calculate
1472	 *	p = Wp + (1 - Wp) * p
1473	 */
1474	fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1475
1476	fve->fve_lastdrop = now;
1477}
1478
1479#endif /* ALTQ_FLOWVALVE */
1480
1481#ifdef KLD_MODULE
1482
1483static struct altqsw red_sw =
1484	{"red", redopen, redclose, redioctl};
1485
1486ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1487MODULE_VERSION(altq_red, 1);
1488
1489#endif /* KLD_MODULE */
1490#endif /* ALTQ3_COMPAT */
1491
1492#endif /* ALTQ_RED */
1493