1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25#include <sys/strsubr.h>
26#include <sys/strsun.h>
27#include <sys/param.h>
28#include <sys/sysmacros.h>
29#include <vm/seg_map.h>
30#include <vm/seg_kpm.h>
31#include <sys/condvar_impl.h>
32#include <sys/sendfile.h>
33#include <fs/sockfs/nl7c.h>
34#include <fs/sockfs/nl7curi.h>
35#include <fs/sockfs/socktpi_impl.h>
36
37#include <inet/common.h>
38#include <inet/ip.h>
39#include <inet/ip6.h>
40#include <inet/tcp.h>
41#include <inet/led.h>
42#include <inet/mi.h>
43
44#include <inet/nca/ncadoorhdr.h>
45#include <inet/nca/ncalogd.h>
46#include <inet/nca/ncandd.h>
47
48#include <sys/promif.h>
49
50/*
51 * Some externs:
52 */
53
54extern boolean_t	nl7c_logd_enabled;
55extern void		nl7c_logd_log(uri_desc_t *, uri_desc_t *,
56			    time_t, ipaddr_t);
57extern boolean_t	nl7c_close_addr(struct sonode *);
58extern struct sonode	*nl7c_addr2portso(void *);
59extern uri_desc_t	*nl7c_http_cond(uri_desc_t *, uri_desc_t *);
60
61/*
62 * Various global tuneables:
63 */
64
65clock_t		nl7c_uri_ttl = -1;	/* TTL in seconds (-1 == infinite) */
66
67boolean_t	nl7c_use_kmem = B_FALSE; /* Force use of kmem (no segmap) */
68
69uint64_t	nl7c_file_prefetch = 1; /* File cache prefetch pages */
70
71uint64_t	nl7c_uri_max = 0;	/* Maximum bytes (0 == infinite) */
72uint64_t	nl7c_uri_bytes = 0;	/* Bytes of kmem used by URIs */
73
74/*
75 * Locals:
76 */
77
78static int	uri_rd_response(struct sonode *, uri_desc_t *,
79		    uri_rd_t *, boolean_t);
80static int	uri_response(struct sonode *, uri_desc_t *);
81
82/*
83 * HTTP scheme functions called from nl7chttp.c:
84 */
85
86boolean_t nl7c_http_request(char **, char *, uri_desc_t *, struct sonode *);
87boolean_t nl7c_http_response(char **, char *, uri_desc_t *, struct sonode *);
88boolean_t nl7c_http_cmp(void *, void *);
89mblk_t *nl7c_http_persist(struct sonode *);
90void nl7c_http_free(void *arg);
91void nl7c_http_init(void);
92
93/*
94 * Counters that need to move to kstat and/or be removed:
95 */
96
97volatile uint64_t nl7c_uri_request = 0;
98volatile uint64_t nl7c_uri_hit = 0;
99volatile uint64_t nl7c_uri_pass = 0;
100volatile uint64_t nl7c_uri_miss = 0;
101volatile uint64_t nl7c_uri_temp = 0;
102volatile uint64_t nl7c_uri_more = 0;
103volatile uint64_t nl7c_uri_data = 0;
104volatile uint64_t nl7c_uri_sendfilev = 0;
105volatile uint64_t nl7c_uri_reclaim_calls = 0;
106volatile uint64_t nl7c_uri_reclaim_cnt = 0;
107volatile uint64_t nl7c_uri_pass_urifail = 0;
108volatile uint64_t nl7c_uri_pass_dupbfail = 0;
109volatile uint64_t nl7c_uri_more_get = 0;
110volatile uint64_t nl7c_uri_pass_method = 0;
111volatile uint64_t nl7c_uri_pass_option = 0;
112volatile uint64_t nl7c_uri_more_eol = 0;
113volatile uint64_t nl7c_uri_more_http = 0;
114volatile uint64_t nl7c_uri_pass_http = 0;
115volatile uint64_t nl7c_uri_pass_addfail = 0;
116volatile uint64_t nl7c_uri_pass_temp = 0;
117volatile uint64_t nl7c_uri_expire = 0;
118volatile uint64_t nl7c_uri_purge = 0;
119volatile uint64_t nl7c_uri_NULL1 = 0;
120volatile uint64_t nl7c_uri_NULL2 = 0;
121volatile uint64_t nl7c_uri_close = 0;
122volatile uint64_t nl7c_uri_temp_close = 0;
123volatile uint64_t nl7c_uri_free = 0;
124volatile uint64_t nl7c_uri_temp_free = 0;
125volatile uint64_t nl7c_uri_temp_mk = 0;
126volatile uint64_t nl7c_uri_rd_EAGAIN = 0;
127
128/*
129 * Various kmem_cache_t's:
130 */
131
132kmem_cache_t *nl7c_uri_kmc;
133kmem_cache_t *nl7c_uri_rd_kmc;
134static kmem_cache_t *uri_desb_kmc;
135static kmem_cache_t *uri_segmap_kmc;
136
137static void uri_kmc_reclaim(void *);
138
139static void nl7c_uri_reclaim(void);
140
141/*
142 * The URI hash is a dynamically sized A/B bucket hash, when the current
143 * hash's average bucket chain length exceeds URI_HASH_AVRG a new hash of
144 * the next P2Ps[] size is created.
145 *
146 * All lookups are done in the current hash then the new hash (if any),
147 * if there is a new has then when a current hash bucket chain is examined
148 * any uri_desc_t members will be migrated to the new hash and when the
149 * last uri_desc_t has been migrated then the new hash will become the
150 * current and the previous current hash will be freed leaving a single
151 * hash.
152 *
153 * uri_hash_t - hash bucket (chain) type, contained in the uri_hash_ab[]
154 * and can be accessed only after aquiring the uri_hash_access lock (for
155 * READER or WRITER) then acquiring the lock uri_hash_t.lock, the uri_hash_t
156 * and all linked uri_desc_t.hash members are protected. Note, a REF_HOLD()
157 * is placed on all uri_desc_t uri_hash_t list members.
158 *
159 * uri_hash_access - rwlock for all uri_hash_* variables, READER for read
160 * access and WRITER for write access. Note, WRITER is only required for
161 * hash geometry changes.
162 *
163 * uri_hash_which - which uri_hash_ab[] is the current hash.
164 *
165 * uri_hash_n[] - the P2Ps[] index for each uri_hash_ab[].
166 *
167 * uri_hash_sz[] - the size for each uri_hash_ab[].
168 *
169 * uri_hash_cnt[] - the total uri_desc_t members for each uri_hash_ab[].
170 *
171 * uri_hash_overflow[] - the uri_hash_cnt[] for each uri_hash_ab[] when
172 * a new uri_hash_ab[] needs to be created.
173 *
174 * uri_hash_ab[] - the uri_hash_t entries.
175 *
176 * uri_hash_lru[] - the last uri_hash_ab[] walked for lru reclaim.
177 */
178
179typedef struct uri_hash_s {
180	struct uri_desc_s	*list;		/* List of uri_t(s) */
181	kmutex_t		lock;
182} uri_hash_t;
183
184#define	URI_HASH_AVRG	5	/* Desired average hash chain length */
185#define	URI_HASH_N_INIT	9	/* P2Ps[] initial index */
186
187static krwlock_t	uri_hash_access;
188static uint32_t		uri_hash_which = 0;
189static uint32_t		uri_hash_n[2] = {URI_HASH_N_INIT, 0};
190static uint32_t		uri_hash_sz[2] = {0, 0};
191static uint32_t		uri_hash_cnt[2] = {0, 0};
192static uint32_t		uri_hash_overflow[2] = {0, 0};
193static uri_hash_t	*uri_hash_ab[2] = {NULL, NULL};
194static uri_hash_t	*uri_hash_lru[2] = {NULL, NULL};
195
196/*
197 * Primes for N of 3 - 24 where P is first prime less then (2^(N-1))+(2^(N-2))
198 * these primes have been foud to be useful for prime sized hash tables.
199 */
200
201static const int P2Ps[] = {
202	0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,
203	6143, 12281, 24571, 49139, 98299, 196597, 393209,
204	786431, 1572853, 3145721, 6291449, 12582893, 0};
205
206/*
207 * Hash macros:
208 *
209 *    H2A(char *cp, char *ep, char c) - convert the escaped octet (ASCII)
210 *    hex multichar of the format "%HH" pointeded to by *cp to a char and
211 *    return in c, *ep points to past end of (char *), on return *cp will
212 *    point to the last char consumed.
213 *
214 *    URI_HASH(unsigned hix, char *cp, char *ep) - hash the char(s) from
215 *    *cp to *ep to the unsigned hix, cp nor ep are modified.
216 *
217 *    URI_HASH_IX(unsigned hix, int which) - convert the hash value hix to
218 *    a hash index 0 - (uri_hash_sz[which] - 1).
219 *
220 *    URI_HASH_MIGRATE(from, hp, to) - migrate the uri_hash_t *hp list
221 *    uri_desc_t members from hash from to hash to.
222 *
223 *    URI_HASH_UNLINK(cur, new, hp, puri, uri) - unlink the uri_desc_t
224 *    *uri which is a member of the uri_hash_t *hp list with a previous
225 *    list member of *puri for the uri_hash_ab[] cur. After unlinking
226 *    check for cur hash empty, if so make new cur. Note, as this macro
227 *    can change a hash chain it needs to be run under hash_access as
228 *    RW_WRITER, futher as it can change the new hash to cur any access
229 *    to the hash state must be done after either dropping locks and
230 *    starting over or making sure the global state is consistent after
231 *    as before.
232 */
233
234#define	H2A(cp, ep, c) {						\
235	int	_h = 2;							\
236	int	_n = 0;							\
237	char	_hc;							\
238									\
239	while (_h > 0 && ++(cp) < (ep)) {				\
240		if (_h == 1)						\
241			_n *= 0x10;					\
242		_hc = *(cp);						\
243		if (_hc >= '0' && _hc <= '9')				\
244			_n += _hc - '0';				\
245		else if (_hc >= 'a' || _hc <= 'f')			\
246			_n += _hc - 'W';				\
247		else if (_hc >= 'A' || _hc <= 'F')			\
248			_n += _hc - '7';				\
249		_h--;							\
250	}								\
251	(c) = _n;							\
252}
253
254#define	URI_HASH(hv, cp, ep) {						\
255	char	*_s = (cp);						\
256	char	_c;							\
257									\
258	while (_s < (ep)) {						\
259		if ((_c = *_s) == '%') {				\
260			H2A(_s, (ep), _c);				\
261		}							\
262		CHASH(hv, _c);						\
263		_s++;							\
264	}								\
265}
266
267#define	URI_HASH_IX(hix, which) (hix) = (hix) % (uri_hash_sz[(which)])
268
269#define	URI_HASH_MIGRATE(from, hp, to) {				\
270	uri_desc_t	*_nuri;						\
271	uint32_t	_nhix;						\
272	uri_hash_t	*_nhp;						\
273									\
274	mutex_enter(&(hp)->lock);					\
275	while ((_nuri = (hp)->list) != NULL) {				\
276		(hp)->list = _nuri->hash;				\
277		atomic_dec_32(&uri_hash_cnt[(from)]);		\
278		atomic_inc_32(&uri_hash_cnt[(to)]);			\
279		_nhix = _nuri->hvalue;					\
280		URI_HASH_IX(_nhix, to);					\
281		_nhp = &uri_hash_ab[(to)][_nhix];			\
282		mutex_enter(&_nhp->lock);				\
283		_nuri->hash = _nhp->list;				\
284		_nhp->list = _nuri;					\
285		_nuri->hit = 0;						\
286		mutex_exit(&_nhp->lock);				\
287	}								\
288	mutex_exit(&(hp)->lock);					\
289}
290
291#define	URI_HASH_UNLINK(cur, new, hp, puri, uri) {			\
292	if ((puri) != NULL) {						\
293		(puri)->hash = (uri)->hash;				\
294	} else {							\
295		(hp)->list = (uri)->hash;				\
296	}								\
297	if (atomic_dec_32_nv(&uri_hash_cnt[(cur)]) == 0 &&		\
298	    uri_hash_ab[(new)] != NULL) {				\
299		kmem_free(uri_hash_ab[cur],				\
300		    sizeof (uri_hash_t) * uri_hash_sz[cur]);		\
301		uri_hash_ab[(cur)] = NULL;				\
302		uri_hash_lru[(cur)] = NULL;				\
303		uri_hash_which = (new);					\
304	} else {							\
305		uri_hash_lru[(cur)] = (hp);				\
306	}								\
307}
308
309void
310nl7c_uri_init(void)
311{
312	uint32_t	cur = uri_hash_which;
313
314	rw_init(&uri_hash_access, NULL, RW_DEFAULT, NULL);
315
316	uri_hash_sz[cur] = P2Ps[URI_HASH_N_INIT];
317	uri_hash_overflow[cur] = P2Ps[URI_HASH_N_INIT] * URI_HASH_AVRG;
318	uri_hash_ab[cur] = kmem_zalloc(sizeof (uri_hash_t) * uri_hash_sz[cur],
319	    KM_SLEEP);
320	uri_hash_lru[cur] = uri_hash_ab[cur];
321
322	nl7c_uri_kmc = kmem_cache_create("NL7C_uri_kmc", sizeof (uri_desc_t),
323	    0, NULL, NULL, uri_kmc_reclaim, NULL, NULL, 0);
324
325	nl7c_uri_rd_kmc = kmem_cache_create("NL7C_uri_rd_kmc",
326	    sizeof (uri_rd_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
327
328	uri_desb_kmc = kmem_cache_create("NL7C_uri_desb_kmc",
329	    sizeof (uri_desb_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
330
331	uri_segmap_kmc = kmem_cache_create("NL7C_uri_segmap_kmc",
332	    sizeof (uri_segmap_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
333
334	nl7c_http_init();
335}
336
337#define	CV_SZ	16
338
339void
340nl7c_mi_report_hash(mblk_t *mp)
341{
342	uri_hash_t	*hp, *pend;
343	uri_desc_t	*uri;
344	uint32_t	cur;
345	uint32_t	new;
346	int		n, nz, tot;
347	uint32_t	cv[CV_SZ + 1];
348
349	rw_enter(&uri_hash_access, RW_READER);
350	cur = uri_hash_which;
351	new = cur ? 0 : 1;
352next:
353	for (n = 0; n <= CV_SZ; n++)
354		cv[n] = 0;
355	nz = 0;
356	tot = 0;
357	hp = &uri_hash_ab[cur][0];
358	pend = &uri_hash_ab[cur][uri_hash_sz[cur]];
359	while (hp < pend) {
360		n = 0;
361		for (uri = hp->list; uri != NULL; uri = uri->hash) {
362			n++;
363		}
364		tot += n;
365		if (n > 0)
366			nz++;
367		if (n > CV_SZ)
368			n = CV_SZ;
369		cv[n]++;
370		hp++;
371	}
372
373	(void) mi_mpprintf(mp, "\nHash=%s, Buckets=%d, "
374	    "Avrg=%d\nCount by bucket:", cur != new ? "CUR" : "NEW",
375	    uri_hash_sz[cur], nz != 0 ? ((tot * 10 + 5) / nz) / 10 : 0);
376	(void) mi_mpprintf(mp, "Free=%d", cv[0]);
377	for (n = 1; n < CV_SZ; n++) {
378		int	pn = 0;
379		char	pv[5];
380		char	*pp = pv;
381
382		for (pn = n; pn < 1000; pn *= 10)
383			*pp++ = ' ';
384		*pp = 0;
385		(void) mi_mpprintf(mp, "%s%d=%d", pv, n, cv[n]);
386	}
387	(void) mi_mpprintf(mp, "Long=%d", cv[CV_SZ]);
388
389	if (cur != new && uri_hash_ab[new] != NULL) {
390		cur = new;
391		goto next;
392	}
393	rw_exit(&uri_hash_access);
394}
395
396void
397nl7c_mi_report_uri(mblk_t *mp)
398{
399	uri_hash_t	*hp;
400	uri_desc_t	*uri;
401	uint32_t	cur;
402	uint32_t	new;
403	int		ix;
404	int		ret;
405	char		sc;
406
407	rw_enter(&uri_hash_access, RW_READER);
408	cur = uri_hash_which;
409	new = cur ? 0 : 1;
410next:
411	for (ix = 0; ix < uri_hash_sz[cur]; ix++) {
412		hp = &uri_hash_ab[cur][ix];
413		mutex_enter(&hp->lock);
414		uri = hp->list;
415		while (uri != NULL) {
416			sc = *(uri->path.ep);
417			*(uri->path.ep) = 0;
418			ret = mi_mpprintf(mp, "%s: %d %d %d",
419			    uri->path.cp, (int)uri->resplen,
420			    (int)uri->respclen, (int)uri->count);
421			*(uri->path.ep) = sc;
422			if (ret == -1) break;
423			uri = uri->hash;
424		}
425		mutex_exit(&hp->lock);
426		if (ret == -1) break;
427	}
428	if (ret != -1 && cur != new && uri_hash_ab[new] != NULL) {
429		cur = new;
430		goto next;
431	}
432	rw_exit(&uri_hash_access);
433}
434
435/*
436 * The uri_desc_t ref_t inactive function called on the last REF_RELE(),
437 * free all resources contained in the uri_desc_t. Note, the uri_desc_t
438 * will be freed by REF_RELE() on return.
439 */
440
441void
442nl7c_uri_inactive(uri_desc_t *uri)
443{
444	int64_t	 bytes = 0;
445
446	if (uri->tail) {
447		uri_rd_t *rdp = &uri->response;
448		uri_rd_t *free = NULL;
449
450		while (rdp) {
451			if (rdp->off == -1) {
452				bytes += rdp->sz;
453				kmem_free(rdp->data.kmem, rdp->sz);
454			} else {
455				VN_RELE(rdp->data.vnode);
456			}
457			rdp = rdp->next;
458			if (free != NULL) {
459				kmem_cache_free(nl7c_uri_rd_kmc, free);
460			}
461			free = rdp;
462		}
463	}
464	if (bytes) {
465		atomic_add_64(&nl7c_uri_bytes, -bytes);
466	}
467	if (uri->scheme != NULL) {
468		nl7c_http_free(uri->scheme);
469	}
470	if (uri->reqmp) {
471		freeb(uri->reqmp);
472	}
473}
474
475/*
476 * The reclaim is called by the kmem subsystem when kmem is running
477 * low. More work is needed to determine the best reclaim policy, for
478 * now we just manipulate the nl7c_uri_max global maximum bytes threshold
479 * value using a simple arithmetic backoff of the value every time this
480 * function is called then call uri_reclaim() to enforce it.
481 *
482 * Note, this value remains in place and enforced for all subsequent
483 * URI request/response processing.
484 *
485 * Note, nl7c_uri_max is currently initialized to 0 or infinite such that
486 * the first call here set it to the current uri_bytes value then backoff
487 * from there.
488 *
489 * XXX how do we determine when to increase nl7c_uri_max ???
490 */
491
492/*ARGSUSED*/
493static void
494uri_kmc_reclaim(void *arg)
495{
496	uint64_t new_max;
497
498	if ((new_max = nl7c_uri_max) == 0) {
499		/* Currently infinite, initialize to current bytes used */
500		nl7c_uri_max = nl7c_uri_bytes;
501		new_max = nl7c_uri_bytes;
502	}
503	if (new_max > 1) {
504		/* Lower max_bytes to 93% of current value */
505		new_max >>= 1;			/* 50% */
506		new_max += (new_max >> 1);	/* 75% */
507		new_max += (new_max >> 2);	/* 93% */
508		if (new_max < nl7c_uri_max)
509			nl7c_uri_max = new_max;
510		else
511			nl7c_uri_max = 1;
512	}
513	nl7c_uri_reclaim();
514}
515
516/*
517 * Delete a uri_desc_t from the URI hash.
518 */
519
520static void
521uri_delete(uri_desc_t *del)
522{
523	uint32_t	hix;
524	uri_hash_t	*hp;
525	uri_desc_t	*uri;
526	uri_desc_t	*puri;
527	uint32_t	cur;
528	uint32_t	new;
529
530	ASSERT(del->hash != URI_TEMP);
531	rw_enter(&uri_hash_access, RW_WRITER);
532	cur = uri_hash_which;
533	new = cur ? 0 : 1;
534next:
535	puri = NULL;
536	hix = del->hvalue;
537	URI_HASH_IX(hix, cur);
538	hp = &uri_hash_ab[cur][hix];
539	for (uri = hp->list; uri != NULL; uri = uri->hash) {
540		if (uri != del) {
541			puri = uri;
542			continue;
543		}
544		/*
545		 * Found the URI, unlink from the hash chain,
546		 * drop locks, ref release it.
547		 */
548		URI_HASH_UNLINK(cur, new, hp, puri, uri);
549		rw_exit(&uri_hash_access);
550		REF_RELE(uri);
551		return;
552	}
553	if (cur != new && uri_hash_ab[new] != NULL) {
554		/*
555		 * Not found in current hash and have a new hash so
556		 * check the new hash next.
557		 */
558		cur = new;
559		goto next;
560	}
561	rw_exit(&uri_hash_access);
562}
563
564/*
565 * Add a uri_desc_t to the URI hash.
566 */
567
568static void
569uri_add(uri_desc_t *uri, krw_t rwlock, boolean_t nonblocking)
570{
571	uint32_t	hix;
572	uri_hash_t	*hp;
573	uint32_t	cur = uri_hash_which;
574	uint32_t	new = cur ? 0 : 1;
575
576	/*
577	 * Caller of uri_add() must hold the uri_hash_access rwlock.
578	 */
579	ASSERT((rwlock == RW_READER && RW_READ_HELD(&uri_hash_access)) ||
580	    (rwlock == RW_WRITER && RW_WRITE_HELD(&uri_hash_access)));
581	/*
582	 * uri_add() always succeeds so add a hash ref to the URI now.
583	 */
584	REF_HOLD(uri);
585again:
586	hix = uri->hvalue;
587	URI_HASH_IX(hix, cur);
588	if (uri_hash_ab[new] == NULL &&
589	    uri_hash_cnt[cur] < uri_hash_overflow[cur]) {
590		/*
591		 * Easy case, no new hash and current hasn't overflowed,
592		 * add URI to current hash and return.
593		 *
594		 * Note, the check for uri_hash_cnt[] above aren't done
595		 * atomictally, i.e. multiple threads can be in this code
596		 * as RW_READER and update the cnt[], this isn't a problem
597		 * as the check is only advisory.
598		 */
599	fast:
600		atomic_inc_32(&uri_hash_cnt[cur]);
601		hp = &uri_hash_ab[cur][hix];
602		mutex_enter(&hp->lock);
603		uri->hash = hp->list;
604		hp->list = uri;
605		mutex_exit(&hp->lock);
606		rw_exit(&uri_hash_access);
607		return;
608	}
609	if (uri_hash_ab[new] == NULL) {
610		/*
611		 * Need a new a or b hash, if not already RW_WRITER
612		 * try to upgrade our lock to writer.
613		 */
614		if (rwlock != RW_WRITER && ! rw_tryupgrade(&uri_hash_access)) {
615			/*
616			 * Upgrade failed, we can't simple exit and reenter
617			 * the lock as after the exit and before the reenter
618			 * the whole world can change so just wait for writer
619			 * then do everything again.
620			 */
621			if (nonblocking) {
622				/*
623				 * Can't block, use fast-path above.
624				 *
625				 * XXX should have a background thread to
626				 * handle new ab[] in this case so as to
627				 * not overflow the cur hash to much.
628				 */
629				goto fast;
630			}
631			rw_exit(&uri_hash_access);
632			rwlock = RW_WRITER;
633			rw_enter(&uri_hash_access, rwlock);
634			cur = uri_hash_which;
635			new = cur ? 0 : 1;
636			goto again;
637		}
638		rwlock = RW_WRITER;
639		if (uri_hash_ab[new] == NULL) {
640			/*
641			 * Still need a new hash, allocate and initialize
642			 * the new hash.
643			 */
644			uri_hash_n[new] = uri_hash_n[cur] + 1;
645			if (uri_hash_n[new] == 0) {
646				/*
647				 * No larger P2Ps[] value so use current,
648				 * i.e. 2 of the largest are better than 1 ?
649				 */
650				uri_hash_n[new] = uri_hash_n[cur];
651				cmn_err(CE_NOTE, "NL7C: hash index overflow");
652			}
653			uri_hash_sz[new] = P2Ps[uri_hash_n[new]];
654			ASSERT(uri_hash_cnt[new] == 0);
655			uri_hash_overflow[new] = uri_hash_sz[new] *
656			    URI_HASH_AVRG;
657			uri_hash_ab[new] = kmem_zalloc(sizeof (uri_hash_t) *
658			    uri_hash_sz[new], nonblocking ? KM_NOSLEEP :
659			    KM_SLEEP);
660			if (uri_hash_ab[new] == NULL) {
661				/*
662				 * Alloc failed, use fast-path above.
663				 *
664				 * XXX should have a background thread to
665				 * handle new ab[] in this case so as to
666				 * not overflow the cur hash to much.
667				 */
668				goto fast;
669			}
670			uri_hash_lru[new] = uri_hash_ab[new];
671		}
672	}
673	/*
674	 * Hashed against current hash so migrate any current hash chain
675	 * members, if any.
676	 *
677	 * Note, the hash chain list can be checked for a non empty list
678	 * outside of the hash chain list lock as the hash chain struct
679	 * can't be destroyed while in the uri_hash_access rwlock, worst
680	 * case is that a non empty list is found and after acquiring the
681	 * lock another thread beats us to it (i.e. migrated the list).
682	 */
683	hp = &uri_hash_ab[cur][hix];
684	if (hp->list != NULL) {
685		URI_HASH_MIGRATE(cur, hp, new);
686	}
687	/*
688	 * If new hash has overflowed before current hash has been
689	 * completely migrated then walk all current hash chains and
690	 * migrate list members now.
691	 */
692	if (atomic_inc_32_nv(&uri_hash_cnt[new]) >= uri_hash_overflow[new]) {
693		for (hix = 0; hix < uri_hash_sz[cur]; hix++) {
694			hp = &uri_hash_ab[cur][hix];
695			if (hp->list != NULL) {
696				URI_HASH_MIGRATE(cur, hp, new);
697			}
698		}
699	}
700	/*
701	 * Add URI to new hash.
702	 */
703	hix = uri->hvalue;
704	URI_HASH_IX(hix, new);
705	hp = &uri_hash_ab[new][hix];
706	mutex_enter(&hp->lock);
707	uri->hash = hp->list;
708	hp->list = uri;
709	mutex_exit(&hp->lock);
710	/*
711	 * Last, check to see if last cur hash chain has been
712	 * migrated, if so free cur hash and make new hash cur.
713	 */
714	if (uri_hash_cnt[cur] == 0) {
715		/*
716		 * If we don't already hold the uri_hash_access rwlock for
717		 * RW_WRITE try to upgrade to RW_WRITE and if successful
718		 * check again and to see if still need to do the free.
719		 */
720		if ((rwlock == RW_WRITER || rw_tryupgrade(&uri_hash_access)) &&
721		    uri_hash_cnt[cur] == 0 && uri_hash_ab[new] != 0) {
722			kmem_free(uri_hash_ab[cur],
723			    sizeof (uri_hash_t) * uri_hash_sz[cur]);
724			uri_hash_ab[cur] = NULL;
725			uri_hash_lru[cur] = NULL;
726			uri_hash_which = new;
727		}
728	}
729	rw_exit(&uri_hash_access);
730}
731
732/*
733 * Lookup a uri_desc_t in the URI hash, if found free the request uri_desc_t
734 * and return the found uri_desc_t with a REF_HOLD() placed on it. Else, if
735 * add B_TRUE use the request URI to create a new hash entry. Else if add
736 * B_FALSE ...
737 */
738
739static uri_desc_t *
740uri_lookup(uri_desc_t *ruri, boolean_t add, boolean_t nonblocking)
741{
742	uint32_t	hix;
743	uri_hash_t	*hp;
744	uri_desc_t	*uri;
745	uri_desc_t	*puri;
746	uint32_t	cur;
747	uint32_t	new;
748	char		*rcp = ruri->path.cp;
749	char		*rep = ruri->path.ep;
750
751again:
752	rw_enter(&uri_hash_access, RW_READER);
753	cur = uri_hash_which;
754	new = cur ? 0 : 1;
755nexthash:
756	puri = NULL;
757	hix = ruri->hvalue;
758	URI_HASH_IX(hix, cur);
759	hp = &uri_hash_ab[cur][hix];
760	mutex_enter(&hp->lock);
761	for (uri = hp->list; uri != NULL; uri = uri->hash) {
762		char	*ap = uri->path.cp;
763		char	*bp = rcp;
764		char	a, b;
765
766		/* Compare paths */
767		while (bp < rep && ap < uri->path.ep) {
768			if ((a = *ap) == '%') {
769				/* Escaped hex multichar, convert it */
770				H2A(ap, uri->path.ep, a);
771			}
772			if ((b = *bp) == '%') {
773				/* Escaped hex multichar, convert it */
774				H2A(bp, rep, b);
775			}
776			if (a != b) {
777				/* Char's don't match */
778				goto nexturi;
779			}
780			ap++;
781			bp++;
782		}
783		if (bp != rep || ap != uri->path.ep) {
784			/* Not same length */
785			goto nexturi;
786		}
787		ap = uri->auth.cp;
788		bp = ruri->auth.cp;
789		if (ap != NULL) {
790			if (bp == NULL) {
791				/* URI has auth request URI doesn't */
792				goto nexturi;
793			}
794			while (bp < ruri->auth.ep && ap < uri->auth.ep) {
795				if ((a = *ap) == '%') {
796					/* Escaped hex multichar, convert it */
797					H2A(ap, uri->path.ep, a);
798				}
799				if ((b = *bp) == '%') {
800					/* Escaped hex multichar, convert it */
801					H2A(bp, rep, b);
802				}
803				if (a != b) {
804					/* Char's don't match */
805					goto nexturi;
806				}
807				ap++;
808				bp++;
809			}
810			if (bp != ruri->auth.ep || ap != uri->auth.ep) {
811				/* Not same length */
812				goto nexturi;
813			}
814		} else if (bp != NULL) {
815			/* URI doesn't have auth and request URI does */
816			goto nexturi;
817		}
818		/*
819		 * Have a path/auth match so before any other processing
820		 * of requested URI, check for expire or request no cache
821		 * purge.
822		 */
823		if (uri->expire >= 0 && uri->expire <= ddi_get_lbolt() ||
824		    ruri->nocache) {
825			/*
826			 * URI has expired or request specified to not use
827			 * the cached version, unlink the URI from the hash
828			 * chain, release all locks, release the hash ref
829			 * on the URI, and last look it up again.
830			 *
831			 * Note, this will cause all variants of the named
832			 * URI to be purged.
833			 */
834			if (puri != NULL) {
835				puri->hash = uri->hash;
836			} else {
837				hp->list = uri->hash;
838			}
839			mutex_exit(&hp->lock);
840			atomic_dec_32(&uri_hash_cnt[cur]);
841			rw_exit(&uri_hash_access);
842			if (ruri->nocache)
843				nl7c_uri_purge++;
844			else
845				nl7c_uri_expire++;
846			REF_RELE(uri);
847			goto again;
848		}
849		if (uri->scheme != NULL) {
850			/*
851			 * URI has scheme private qualifier(s), if request
852			 * URI doesn't or if no match skip this URI.
853			 */
854			if (ruri->scheme == NULL ||
855			    ! nl7c_http_cmp(uri->scheme, ruri->scheme))
856				goto nexturi;
857		} else if (ruri->scheme != NULL) {
858			/*
859			 * URI doesn't have scheme private qualifiers but
860			 * request URI does, no match, skip this URI.
861			 */
862			goto nexturi;
863		}
864		/*
865		 * Have a match, ready URI for return, first put a reference
866		 * hold on the URI, if this URI is currently being processed
867		 * then have to wait for the processing to be completed and
868		 * redo the lookup, else return it.
869		 */
870		REF_HOLD(uri);
871		mutex_enter(&uri->proclock);
872		if (uri->proc != NULL) {
873			/* The URI is being processed, wait for completion */
874			mutex_exit(&hp->lock);
875			rw_exit(&uri_hash_access);
876			if (! nonblocking &&
877			    cv_wait_sig(&uri->waiting, &uri->proclock)) {
878				/*
879				 * URI has been processed but things may
880				 * have changed while we were away so do
881				 * most everything again.
882				 */
883				mutex_exit(&uri->proclock);
884				REF_RELE(uri);
885				goto again;
886			} else {
887				/*
888				 * A nonblocking socket or an interrupted
889				 * cv_wait_sig() in the first case can't
890				 * block waiting for the processing of the
891				 * uri hash hit uri to complete, in both
892				 * cases just return failure to lookup.
893				 */
894				mutex_exit(&uri->proclock);
895				REF_RELE(uri);
896				return (NULL);
897			}
898		}
899		mutex_exit(&uri->proclock);
900		uri->hit++;
901		mutex_exit(&hp->lock);
902		rw_exit(&uri_hash_access);
903		return (uri);
904	nexturi:
905		puri = uri;
906	}
907	mutex_exit(&hp->lock);
908	if (cur != new && uri_hash_ab[new] != NULL) {
909		/*
910		 * Not found in current hash and have a new hash so
911		 * check the new hash next.
912		 */
913		cur = new;
914		goto nexthash;
915	}
916add:
917	if (! add) {
918		/* Lookup only so return failure */
919		rw_exit(&uri_hash_access);
920		return (NULL);
921	}
922	/*
923	 * URI not hashed, finish intialization of the
924	 * request URI, add it to the hash, return it.
925	 */
926	ruri->hit = 0;
927	ruri->expire = -1;
928	ruri->response.sz = 0;
929	ruri->proc = (struct sonode *)~0;
930	cv_init(&ruri->waiting, NULL, CV_DEFAULT, NULL);
931	mutex_init(&ruri->proclock, NULL, MUTEX_DEFAULT, NULL);
932	uri_add(ruri, RW_READER, nonblocking);
933	/* uri_add() has done rw_exit(&uri_hash_access) */
934	return (ruri);
935}
936
937/*
938 * Reclaim URIs until max cache size threshold has been reached.
939 *
940 * A CLOCK based reclaim modified with a history (hit counter) counter.
941 */
942
943static void
944nl7c_uri_reclaim(void)
945{
946	uri_hash_t	*hp, *start, *pend;
947	uri_desc_t	*uri;
948	uri_desc_t	*puri;
949	uint32_t	cur;
950	uint32_t	new;
951
952	nl7c_uri_reclaim_calls++;
953again:
954	rw_enter(&uri_hash_access, RW_WRITER);
955	cur = uri_hash_which;
956	new = cur ? 0 : 1;
957next:
958	hp = uri_hash_lru[cur];
959	start = hp;
960	pend = &uri_hash_ab[cur][uri_hash_sz[cur]];
961	while (nl7c_uri_bytes > nl7c_uri_max) {
962		puri = NULL;
963		for (uri = hp->list; uri != NULL; uri = uri->hash) {
964			if (uri->hit != 0) {
965				/*
966				 * Decrement URI activity counter and skip.
967				 */
968				uri->hit--;
969				puri = uri;
970				continue;
971			}
972			if (uri->proc != NULL) {
973				/*
974				 * Currently being processed by a socket, skip.
975				 */
976				continue;
977			}
978			/*
979			 * Found a candidate, no hit(s) since added or last
980			 * reclaim pass, unlink from it's hash chain, update
981			 * lru scan pointer, drop lock, ref release it.
982			 */
983			URI_HASH_UNLINK(cur, new, hp, puri, uri);
984			if (cur == uri_hash_which) {
985				if (++hp == pend) {
986					/* Wrap pointer */
987					hp = uri_hash_ab[cur];
988				}
989				uri_hash_lru[cur] = hp;
990			}
991			rw_exit(&uri_hash_access);
992			REF_RELE(uri);
993			nl7c_uri_reclaim_cnt++;
994			goto again;
995		}
996		if (++hp == pend) {
997			/* Wrap pointer */
998			hp = uri_hash_ab[cur];
999		}
1000		if (hp == start) {
1001			if (cur != new && uri_hash_ab[new] != NULL) {
1002				/*
1003				 * Done with the current hash and have a
1004				 * new hash so check the new hash next.
1005				 */
1006				cur = new;
1007				goto next;
1008			}
1009		}
1010	}
1011	rw_exit(&uri_hash_access);
1012}
1013
1014/*
1015 * Called for a socket which is being freed prior to close, e.g. errored.
1016 */
1017
1018void
1019nl7c_urifree(struct sonode *so)
1020{
1021	sotpi_info_t *sti = SOTOTPI(so);
1022	uri_desc_t *uri = (uri_desc_t *)sti->sti_nl7c_uri;
1023
1024	sti->sti_nl7c_uri = NULL;
1025	if (uri->hash != URI_TEMP) {
1026		uri_delete(uri);
1027		mutex_enter(&uri->proclock);
1028		uri->proc = NULL;
1029		if (CV_HAS_WAITERS(&uri->waiting)) {
1030			cv_broadcast(&uri->waiting);
1031		}
1032		mutex_exit(&uri->proclock);
1033		nl7c_uri_free++;
1034	} else {
1035		/* No proclock as uri exclusively owned by so */
1036		uri->proc = NULL;
1037		nl7c_uri_temp_free++;
1038	}
1039	REF_RELE(uri);
1040}
1041
1042/*
1043 * ...
1044 *
1045 *	< 0	need more data
1046 *
1047 *	  0	parse complete
1048 *
1049 *	> 0	parse error
1050 */
1051
1052volatile uint64_t nl7c_resp_pfail = 0;
1053volatile uint64_t nl7c_resp_ntemp = 0;
1054volatile uint64_t nl7c_resp_pass = 0;
1055
1056static int
1057nl7c_resp_parse(struct sonode *so, uri_desc_t *uri, char *data, int sz)
1058{
1059	if (! nl7c_http_response(&data, &data[sz], uri, so)) {
1060		if (data == NULL) {
1061			/* Parse fail */
1062			goto pfail;
1063		}
1064		/* More data */
1065		data = NULL;
1066	} else if (data == NULL) {
1067		goto pass;
1068	}
1069	if (uri->hash != URI_TEMP && uri->nocache) {
1070		/*
1071		 * After response parse now no cache,
1072		 * delete it from cache, wakeup any
1073		 * waiters on this URI, make URI_TEMP.
1074		 */
1075		uri_delete(uri);
1076		mutex_enter(&uri->proclock);
1077		if (CV_HAS_WAITERS(&uri->waiting)) {
1078			cv_broadcast(&uri->waiting);
1079		}
1080		mutex_exit(&uri->proclock);
1081		uri->hash = URI_TEMP;
1082		nl7c_uri_temp_mk++;
1083	}
1084	if (data == NULL) {
1085		/* More data needed */
1086		return (-1);
1087	}
1088	/* Success */
1089	return (0);
1090
1091pfail:
1092	nl7c_resp_pfail++;
1093	return (EINVAL);
1094
1095pass:
1096	nl7c_resp_pass++;
1097	return (ENOTSUP);
1098}
1099
1100/*
1101 * Called to sink application response data, the processing of the data
1102 * is the same for a cached or temp URI (i.e. a URI for which we aren't
1103 * going to cache the URI but want to parse it for detecting response
1104 * data end such that for a persistent connection we can parse the next
1105 * request).
1106 *
1107 * On return 0 is returned for sink success, > 0 on error, and < 0 on
1108 * no so URI (note, data not sinked).
1109 */
1110
1111int
1112nl7c_data(struct sonode *so, uio_t *uio)
1113{
1114	sotpi_info_t	*sti = SOTOTPI(so);
1115	uri_desc_t	*uri = (uri_desc_t *)sti->sti_nl7c_uri;
1116	iovec_t		*iov;
1117	int		cnt;
1118	int		sz = uio->uio_resid;
1119	char		*data, *alloc;
1120	char		*bp;
1121	uri_rd_t	*rdp;
1122	boolean_t	first;
1123	int		error, perror;
1124
1125	nl7c_uri_data++;
1126
1127	if (uri == NULL) {
1128		/* Socket & NL7C out of sync, disable NL7C */
1129		sti->sti_nl7c_flags = 0;
1130		nl7c_uri_NULL1++;
1131		return (-1);
1132	}
1133
1134	if (sti->sti_nl7c_flags & NL7C_WAITWRITE) {
1135		sti->sti_nl7c_flags &= ~NL7C_WAITWRITE;
1136		first = B_TRUE;
1137	} else {
1138		first = B_FALSE;
1139	}
1140
1141	alloc = kmem_alloc(sz, KM_SLEEP);
1142	URI_RD_ADD(uri, rdp, sz, -1);
1143
1144	if (uri->hash != URI_TEMP && uri->count > nca_max_cache_size) {
1145		uri_delete(uri);
1146		uri->hash = URI_TEMP;
1147	}
1148	data = alloc;
1149	alloc = NULL;
1150	rdp->data.kmem = data;
1151	atomic_add_64(&nl7c_uri_bytes, sz);
1152
1153	bp = data;
1154	while (uio->uio_resid > 0) {
1155		iov = uio->uio_iov;
1156		if ((cnt = iov->iov_len) == 0) {
1157			goto next;
1158		}
1159		cnt = MIN(cnt, uio->uio_resid);
1160		error = xcopyin(iov->iov_base, bp, cnt);
1161		if (error)
1162			goto fail;
1163
1164		iov->iov_base += cnt;
1165		iov->iov_len -= cnt;
1166		uio->uio_resid -= cnt;
1167		uio->uio_loffset += cnt;
1168		bp += cnt;
1169	next:
1170		uio->uio_iov++;
1171		uio->uio_iovcnt--;
1172	}
1173
1174	/* Successfull sink of data, response parse the data */
1175	perror = nl7c_resp_parse(so, uri, data, sz);
1176
1177	/* Send the data out the connection */
1178	error = uri_rd_response(so, uri, rdp, first);
1179	if (error)
1180		goto fail;
1181
1182	/* Success */
1183	if (perror == 0 &&
1184	    ((uri->respclen == URI_LEN_NOVALUE &&
1185	    uri->resplen == URI_LEN_NOVALUE) ||
1186	    uri->count >= uri->resplen)) {
1187		/*
1188		 * No more data needed and no pending response
1189		 * data or current data count >= response length
1190		 * so close the URI processing for this so.
1191		 */
1192		nl7c_close(so);
1193		if (! (sti->sti_nl7c_flags & NL7C_SOPERSIST)) {
1194			/* Not a persistent connection */
1195			sti->sti_nl7c_flags = 0;
1196		}
1197	}
1198
1199	return (0);
1200
1201fail:
1202	if (alloc != NULL) {
1203		kmem_free(alloc, sz);
1204	}
1205	sti->sti_nl7c_flags = 0;
1206	nl7c_urifree(so);
1207
1208	return (error);
1209}
1210
1211/*
1212 * Called to read data from file "*fp" at offset "*off" of length "*len"
1213 * for a maximum of "*max_rem" bytes.
1214 *
1215 * On success a pointer to the kmem_alloc()ed file data is returned, "*off"
1216 * and "*len" are updated for the acutal number of bytes read and "*max_rem"
1217 * is updated with the number of bytes remaining to be read.
1218 *
1219 * Else, "NULL" is returned.
1220 */
1221
1222static char *
1223nl7c_readfile(file_t *fp, u_offset_t *off, int *len, int max, int *ret)
1224{
1225	vnode_t	*vp = fp->f_vnode;
1226	int	flg = 0;
1227	size_t	size = MIN(*len, max);
1228	char	*data;
1229	int	error;
1230	uio_t	uio;
1231	iovec_t	iov;
1232
1233	(void) VOP_RWLOCK(vp, flg, NULL);
1234
1235	if (*off > MAXOFFSET_T) {
1236		VOP_RWUNLOCK(vp, flg, NULL);
1237		*ret = EFBIG;
1238		return (NULL);
1239	}
1240
1241	if (*off + size > MAXOFFSET_T)
1242		size = (ssize32_t)(MAXOFFSET_T - *off);
1243
1244	data = kmem_alloc(size, KM_SLEEP);
1245
1246	iov.iov_base = data;
1247	iov.iov_len = size;
1248	uio.uio_loffset = *off;
1249	uio.uio_iov = &iov;
1250	uio.uio_iovcnt = 1;
1251	uio.uio_resid = size;
1252	uio.uio_segflg = UIO_SYSSPACE;
1253	uio.uio_llimit = MAXOFFSET_T;
1254	uio.uio_fmode = fp->f_flag;
1255
1256	error = VOP_READ(vp, &uio, fp->f_flag, fp->f_cred, NULL);
1257	VOP_RWUNLOCK(vp, flg, NULL);
1258	*ret = error;
1259	if (error) {
1260		kmem_free(data, size);
1261		return (NULL);
1262	}
1263	*len = size;
1264	*off += size;
1265	return (data);
1266}
1267
1268/*
1269 * Called to sink application response sendfilev, as with nl7c_data() above
1270 * all the data will be processed by NL7C unless there's an error.
1271 */
1272
1273int
1274nl7c_sendfilev(struct sonode *so, u_offset_t *fileoff, sendfilevec_t *sfvp,
1275    int sfvc, ssize_t *xfer)
1276{
1277	sotpi_info_t	*sti = SOTOTPI(so);
1278	uri_desc_t	*uri = (uri_desc_t *)sti->sti_nl7c_uri;
1279	file_t		*fp = NULL;
1280	vnode_t		*vp = NULL;
1281	char		*data = NULL;
1282	u_offset_t	off;
1283	int		len;
1284	int		cnt;
1285	int		total_count = 0;
1286	char		*alloc;
1287	uri_rd_t	*rdp;
1288	int		max;
1289	int		perror;
1290	int		error = 0;
1291	boolean_t	first = B_TRUE;
1292
1293	nl7c_uri_sendfilev++;
1294
1295	if (uri == NULL) {
1296		/* Socket & NL7C out of sync, disable NL7C */
1297		sti->sti_nl7c_flags = 0;
1298		nl7c_uri_NULL2++;
1299		return (0);
1300	}
1301
1302	if (sti->sti_nl7c_flags & NL7C_WAITWRITE)
1303		sti->sti_nl7c_flags &= ~NL7C_WAITWRITE;
1304
1305	while (sfvc-- > 0) {
1306		/*
1307		 * off - the current sfv read file offset or user address.
1308		 *
1309		 * len - the current sfv length in bytes.
1310		 *
1311		 * cnt - number of bytes kmem_alloc()ed.
1312		 *
1313		 * alloc - the kmem_alloc()ed buffer of size "cnt".
1314		 *
1315		 * data - copy of "alloc" used for post alloc references.
1316		 *
1317		 * fp - the current sfv file_t pointer.
1318		 *
1319		 * vp - the current "*vp" vnode_t pointer.
1320		 *
1321		 * Note, for "data" and "fp" and "vp" a NULL value is used
1322		 * when not allocated such that the common failure path "fail"
1323		 * is used.
1324		 */
1325		off = sfvp->sfv_off;
1326		len = sfvp->sfv_len;
1327		cnt = len;
1328
1329		if (len == 0) {
1330			sfvp++;
1331			continue;
1332		}
1333
1334		if (sfvp->sfv_fd == SFV_FD_SELF) {
1335			/*
1336			 * User memory, copyin() all the bytes.
1337			 */
1338			alloc = kmem_alloc(cnt, KM_SLEEP);
1339			error = xcopyin((caddr_t)(uintptr_t)off, alloc, cnt);
1340			if (error)
1341				goto fail;
1342		} else {
1343			/*
1344			 * File descriptor, prefetch some bytes.
1345			 */
1346			if ((fp = getf(sfvp->sfv_fd)) == NULL) {
1347				error = EBADF;
1348				goto fail;
1349			}
1350			if ((fp->f_flag & FREAD) == 0) {
1351				error = EACCES;
1352				goto fail;
1353			}
1354			vp = fp->f_vnode;
1355			if (vp->v_type != VREG) {
1356				error = EINVAL;
1357				goto fail;
1358			}
1359			VN_HOLD(vp);
1360
1361			/* Read max_rem bytes from file for prefetch */
1362			if (nl7c_use_kmem) {
1363				max = cnt;
1364			} else {
1365				max = MAXBSIZE * nl7c_file_prefetch;
1366			}
1367			alloc = nl7c_readfile(fp, &off, &cnt, max, &error);
1368			if (alloc == NULL)
1369				goto fail;
1370
1371			releasef(sfvp->sfv_fd);
1372			fp = NULL;
1373		}
1374		URI_RD_ADD(uri, rdp, cnt, -1);
1375		data = alloc;
1376		alloc = NULL;
1377		rdp->data.kmem = data;
1378		total_count += cnt;
1379		if (uri->hash != URI_TEMP && total_count > nca_max_cache_size) {
1380			uri_delete(uri);
1381			uri->hash = URI_TEMP;
1382		}
1383
1384		/* Response parse */
1385		perror = nl7c_resp_parse(so, uri, data, len);
1386
1387		/* Send kmem data out the connection */
1388		error = uri_rd_response(so, uri, rdp, first);
1389
1390		if (error)
1391			goto fail;
1392
1393		if (sfvp->sfv_fd != SFV_FD_SELF) {
1394			/*
1395			 * File descriptor, if any bytes left save vnode_t.
1396			 */
1397			if (len > cnt) {
1398				/* More file data so add it */
1399				URI_RD_ADD(uri, rdp, len - cnt, off);
1400				rdp->data.vnode = vp;
1401
1402				/* Send vnode data out the connection */
1403				error = uri_rd_response(so, uri, rdp, first);
1404			} else {
1405				/* All file data fit in the prefetch */
1406				VN_RELE(vp);
1407			}
1408			*fileoff += len;
1409			vp = NULL;
1410		}
1411		*xfer += len;
1412		sfvp++;
1413
1414		if (first)
1415			first = B_FALSE;
1416	}
1417	if (total_count > 0) {
1418		atomic_add_64(&nl7c_uri_bytes, total_count);
1419	}
1420	if (perror == 0 &&
1421	    ((uri->respclen == URI_LEN_NOVALUE &&
1422	    uri->resplen == URI_LEN_NOVALUE) ||
1423	    uri->count >= uri->resplen)) {
1424		/*
1425		 * No more data needed and no pending response
1426		 * data or current data count >= response length
1427		 * so close the URI processing for this so.
1428		 */
1429		nl7c_close(so);
1430		if (! (sti->sti_nl7c_flags & NL7C_SOPERSIST)) {
1431			/* Not a persistent connection */
1432			sti->sti_nl7c_flags = 0;
1433		}
1434	}
1435
1436	return (0);
1437
1438fail:
1439	if (error == EPIPE)
1440		tsignal(curthread, SIGPIPE);
1441
1442	if (alloc != NULL)
1443		kmem_free(data, len);
1444
1445	if (vp != NULL)
1446		VN_RELE(vp);
1447
1448	if (fp != NULL)
1449		releasef(sfvp->sfv_fd);
1450
1451	if (total_count > 0) {
1452		atomic_add_64(&nl7c_uri_bytes, total_count);
1453	}
1454
1455	sti->sti_nl7c_flags = 0;
1456	nl7c_urifree(so);
1457
1458	return (error);
1459}
1460
1461/*
1462 * Called for a socket which is closing or when an application has
1463 * completed sending all the response data (i.e. for a persistent
1464 * connection called once for each completed application response).
1465 */
1466
1467void
1468nl7c_close(struct sonode *so)
1469{
1470	sotpi_info_t	*sti = SOTOTPI(so);
1471	uri_desc_t	*uri = (uri_desc_t *)sti->sti_nl7c_uri;
1472
1473	if (uri == NULL) {
1474		/*
1475		 * No URI being processed so might be a listen()er
1476		 * if so do any cleanup, else nothing more to do.
1477		 */
1478		if (so->so_state & SS_ACCEPTCONN) {
1479			(void) nl7c_close_addr(so);
1480		}
1481		return;
1482	}
1483	sti->sti_nl7c_uri = NULL;
1484	if (uri->hash != URI_TEMP) {
1485		mutex_enter(&uri->proclock);
1486		uri->proc = NULL;
1487		if (CV_HAS_WAITERS(&uri->waiting)) {
1488			cv_broadcast(&uri->waiting);
1489		}
1490		mutex_exit(&uri->proclock);
1491		nl7c_uri_close++;
1492	} else {
1493		/* No proclock as uri exclusively owned by so */
1494		uri->proc = NULL;
1495		nl7c_uri_temp_close++;
1496	}
1497	REF_RELE(uri);
1498	if (nl7c_uri_max > 0 && nl7c_uri_bytes > nl7c_uri_max) {
1499		nl7c_uri_reclaim();
1500	}
1501}
1502
1503/*
1504 * The uri_segmap_t ref_t inactive function called on the last REF_RELE(),
1505 * release the segmap mapping. Note, the uri_segmap_t will be freed by
1506 * REF_RELE() on return.
1507 */
1508
1509void
1510uri_segmap_inactive(uri_segmap_t *smp)
1511{
1512	if (!segmap_kpm) {
1513		(void) segmap_fault(kas.a_hat, segkmap, smp->base,
1514		    smp->len, F_SOFTUNLOCK, S_OTHER);
1515	}
1516	(void) segmap_release(segkmap, smp->base, SM_DONTNEED);
1517	VN_RELE(smp->vp);
1518}
1519
1520/*
1521 * The call-back for desballoc()ed mblk_t's, if a segmap mapped mblk_t
1522 * release the reference, one per desballoc() of a segmap page, if a rd_t
1523 * mapped mblk_t release the reference, one per desballoc() of a uri_desc_t,
1524 * last kmem free the uri_desb_t.
1525 */
1526
1527static void
1528uri_desb_free(uri_desb_t *desb)
1529{
1530	if (desb->segmap != NULL) {
1531		REF_RELE(desb->segmap);
1532	}
1533	REF_RELE(desb->uri);
1534	kmem_cache_free(uri_desb_kmc, desb);
1535}
1536
1537/*
1538 * Segmap map up to a page of a uri_rd_t file descriptor.
1539 */
1540
1541uri_segmap_t *
1542uri_segmap_map(uri_rd_t *rdp, int bytes)
1543{
1544	uri_segmap_t	*segmap = kmem_cache_alloc(uri_segmap_kmc, KM_SLEEP);
1545	int		len = MIN(rdp->sz, MAXBSIZE);
1546
1547	if (len > bytes)
1548		len = bytes;
1549
1550	REF_INIT(segmap, 1, uri_segmap_inactive, uri_segmap_kmc);
1551	segmap->len = len;
1552	VN_HOLD(rdp->data.vnode);
1553	segmap->vp = rdp->data.vnode;
1554
1555	segmap->base = segmap_getmapflt(segkmap, segmap->vp, rdp->off, len,
1556	    segmap_kpm ? SM_FAULT : 0, S_READ);
1557
1558	if (segmap_fault(kas.a_hat, segkmap, segmap->base, len,
1559	    F_SOFTLOCK, S_READ) != 0) {
1560		REF_RELE(segmap);
1561		return (NULL);
1562	}
1563	return (segmap);
1564}
1565
1566/*
1567 * Chop up the kernel virtual memory area *data of size *sz bytes for
1568 * a maximum of *bytes bytes into an besballoc()ed mblk_t chain using
1569 * the given template uri_desb_t *temp of max_mblk bytes per.
1570 *
1571 * The values of *data, *sz, and *bytes are updated on return, the
1572 * mblk_t chain is returned.
1573 */
1574
1575static mblk_t *
1576uri_desb_chop(char **data, size_t *sz, int *bytes, uri_desb_t *temp,
1577    int max_mblk, char *eoh, mblk_t *persist)
1578{
1579	char		*ldata = *data;
1580	size_t		lsz = *sz;
1581	int		lbytes = bytes ? *bytes : lsz;
1582	uri_desb_t	*desb;
1583	mblk_t		*mp = NULL;
1584	mblk_t		*nmp, *pmp = NULL;
1585	int		msz;
1586
1587	if (lbytes == 0 && lsz == 0)
1588		return (NULL);
1589
1590	while (lbytes > 0 && lsz > 0) {
1591		msz = MIN(lbytes, max_mblk);
1592		msz = MIN(msz, lsz);
1593		if (persist && eoh >= ldata && eoh < &ldata[msz]) {
1594			msz = (eoh - ldata);
1595			pmp = persist;
1596			persist = NULL;
1597			if (msz == 0) {
1598				nmp = pmp;
1599				pmp = NULL;
1600				goto zero;
1601			}
1602		}
1603		desb = kmem_cache_alloc(uri_desb_kmc, KM_SLEEP);
1604		REF_HOLD(temp->uri);
1605		if (temp->segmap) {
1606			REF_HOLD(temp->segmap);
1607		}
1608		bcopy(temp, desb, sizeof (*desb));
1609		desb->frtn.free_arg = (caddr_t)desb;
1610		nmp = desballoc((uchar_t *)ldata, msz, BPRI_HI, &desb->frtn);
1611		if (nmp == NULL) {
1612			if (temp->segmap) {
1613				REF_RELE(temp->segmap);
1614			}
1615			REF_RELE(temp->uri);
1616			if (mp != NULL) {
1617				mp->b_next = NULL;
1618				freemsg(mp);
1619			}
1620			if (persist != NULL) {
1621				freeb(persist);
1622			}
1623			return (NULL);
1624		}
1625		nmp->b_wptr += msz;
1626	zero:
1627		if (mp != NULL) {
1628			mp->b_next->b_cont = nmp;
1629		} else {
1630			mp = nmp;
1631		}
1632		if (pmp != NULL) {
1633			nmp->b_cont = pmp;
1634			nmp = pmp;
1635			pmp = NULL;
1636		}
1637		mp->b_next = nmp;
1638		ldata += msz;
1639		lsz -= msz;
1640		lbytes -= msz;
1641	}
1642	*data = ldata;
1643	*sz = lsz;
1644	if (bytes)
1645		*bytes = lbytes;
1646	return (mp);
1647}
1648
1649/*
1650 * Experimential noqwait (i.e. no canput()/qwait() checks), just send
1651 * the entire mblk_t chain down without flow-control checks.
1652 */
1653
1654static int
1655kstrwritempnoqwait(struct vnode *vp, mblk_t *mp)
1656{
1657	struct stdata *stp;
1658	int error = 0;
1659
1660	ASSERT(vp->v_stream);
1661	stp = vp->v_stream;
1662
1663	/* Fast check of flags before acquiring the lock */
1664	if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
1665		mutex_enter(&stp->sd_lock);
1666		error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0);
1667		mutex_exit(&stp->sd_lock);
1668		if (error != 0) {
1669			if (!(stp->sd_flag & STPLEX) &&
1670			    (stp->sd_wput_opt & SW_SIGPIPE)) {
1671				error = EPIPE;
1672			}
1673			return (error);
1674		}
1675	}
1676	putnext(stp->sd_wrq, mp);
1677	return (0);
1678}
1679
1680/*
1681 * Send the URI uri_desc_t *uri response uri_rd_t *rdp out the socket_t *so.
1682 */
1683
1684static int
1685uri_rd_response(struct sonode *so,
1686    uri_desc_t *uri,
1687    uri_rd_t *rdp,
1688    boolean_t first)
1689{
1690	vnode_t		*vp = SOTOV(so);
1691	int		max_mblk = (int)vp->v_stream->sd_maxblk;
1692	int		wsz;
1693	mblk_t		*mp, *wmp, *persist;
1694	int		write_bytes;
1695	uri_rd_t	rd;
1696	uri_desb_t	desb;
1697	uri_segmap_t	*segmap = NULL;
1698	char		*segmap_data;
1699	size_t		segmap_sz;
1700	int		error;
1701	int		fflg = ((so->so_state & SS_NDELAY) ? FNDELAY : 0) |
1702	    ((so->so_state & SS_NONBLOCK) ? FNONBLOCK : 0);
1703
1704
1705	/* Initialize template uri_desb_t */
1706	desb.frtn.free_func = uri_desb_free;
1707	desb.frtn.free_arg = NULL;
1708	desb.uri = uri;
1709
1710	/* Get a local copy of the rd_t */
1711	bcopy(rdp, &rd, sizeof (rd));
1712	do {
1713		if (first) {
1714			/*
1715			 * For first kstrwrite() enough data to get
1716			 * things going, note non blocking version of
1717			 * kstrwrite() will be used below.
1718			 */
1719			write_bytes = P2ROUNDUP((max_mblk * 4),
1720			    MAXBSIZE * nl7c_file_prefetch);
1721		} else {
1722			if ((write_bytes = so->so_sndbuf) == 0)
1723				write_bytes = vp->v_stream->sd_qn_maxpsz;
1724			ASSERT(write_bytes > 0);
1725			write_bytes = P2ROUNDUP(write_bytes, MAXBSIZE);
1726		}
1727		/*
1728		 * Chop up to a write_bytes worth of data.
1729		 */
1730		wmp = NULL;
1731		wsz = write_bytes;
1732		do {
1733			if (rd.sz == 0)
1734				break;
1735			if (rd.off == -1) {
1736				if (uri->eoh >= rd.data.kmem &&
1737				    uri->eoh < &rd.data.kmem[rd.sz]) {
1738					persist = nl7c_http_persist(so);
1739				} else {
1740					persist = NULL;
1741				}
1742				desb.segmap = NULL;
1743				mp = uri_desb_chop(&rd.data.kmem, &rd.sz,
1744				    &wsz, &desb, max_mblk, uri->eoh, persist);
1745				if (mp == NULL) {
1746					error = ENOMEM;
1747					goto invalidate;
1748				}
1749			} else {
1750				if (segmap == NULL) {
1751					segmap = uri_segmap_map(&rd,
1752					    write_bytes);
1753					if (segmap == NULL) {
1754						error = ENOMEM;
1755						goto invalidate;
1756					}
1757					desb.segmap = segmap;
1758					segmap_data = segmap->base;
1759					segmap_sz = segmap->len;
1760				}
1761				mp = uri_desb_chop(&segmap_data, &segmap_sz,
1762				    &wsz, &desb, max_mblk, NULL, NULL);
1763				if (mp == NULL) {
1764					error = ENOMEM;
1765					goto invalidate;
1766				}
1767				if (segmap_sz == 0) {
1768					rd.sz -= segmap->len;
1769					rd.off += segmap->len;
1770					REF_RELE(segmap);
1771					segmap = NULL;
1772				}
1773			}
1774			if (wmp == NULL) {
1775				wmp = mp;
1776			} else {
1777				wmp->b_next->b_cont = mp;
1778				wmp->b_next = mp->b_next;
1779				mp->b_next = NULL;
1780			}
1781		} while (wsz > 0 && rd.sz > 0);
1782
1783		wmp->b_next = NULL;
1784		if (first) {
1785			/* First kstrwrite(), use noqwait */
1786			if ((error = kstrwritempnoqwait(vp, wmp)) != 0)
1787				goto invalidate;
1788			/*
1789			 * For the rest of the kstrwrite()s use SO_SNDBUF
1790			 * worth of data at a time, note these kstrwrite()s
1791			 * may (will) block one or more times.
1792			 */
1793			first = B_FALSE;
1794		} else {
1795			if ((error = kstrwritemp(vp, wmp, fflg)) != 0) {
1796				if (error == EAGAIN) {
1797					nl7c_uri_rd_EAGAIN++;
1798					if ((error =
1799					    kstrwritempnoqwait(vp, wmp)) != 0)
1800						goto invalidate;
1801				} else
1802					goto invalidate;
1803			}
1804		}
1805	} while (rd.sz > 0);
1806
1807	return (0);
1808
1809invalidate:
1810	if (segmap) {
1811		REF_RELE(segmap);
1812	}
1813	if (wmp)
1814		freemsg(wmp);
1815
1816	return (error);
1817}
1818
1819/*
1820 * Send the URI uri_desc_t *uri response out the socket_t *so.
1821 */
1822
1823static int
1824uri_response(struct sonode *so, uri_desc_t *uri)
1825{
1826	uri_rd_t	*rdp = &uri->response;
1827	boolean_t	first = B_TRUE;
1828	int		error;
1829
1830	while (rdp != NULL) {
1831		error = uri_rd_response(so, uri, rdp, first);
1832		if (error != 0) {
1833			goto invalidate;
1834		}
1835		first = B_FALSE;
1836		rdp = rdp->next;
1837	}
1838	return (0);
1839
1840invalidate:
1841	if (uri->hash != URI_TEMP)
1842		uri_delete(uri);
1843	return (error);
1844}
1845
1846/*
1847 * The pchars[] array is indexed by a char to determine if it's a
1848 * valid URI path component chararcter where:
1849 *
1850 *    pchar       = unreserved | escaped |
1851 *                  ":" | "@" | "&" | "=" | "+" | "$" | ","
1852 *
1853 *    unreserved  = alphanum | mark
1854 *
1855 *    alphanum    = alpha | digit
1856 *
1857 *    alpha       = lowalpha | upalpha
1858 *
1859 *    lowalpha    = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" |
1860 *                  "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" |
1861 *                  "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" |
1862 *                  "y" | "z"
1863 *
1864 *    upalpha     = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" |
1865 *                  "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" |
1866 *                  "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" |
1867 *                  "Y" | "Z"
1868 *
1869 *    digit       = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
1870 *                  "8" | "9"
1871 *
1872 *    mark        = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
1873 *
1874 *    escaped     = "%" hex hex
1875 *    hex         = digit | "A" | "B" | "C" | "D" | "E" | "F" |
1876 *                  "a" | "b" | "c" | "d" | "e" | "f"
1877 */
1878
1879static char pchars[] = {
1880    0, 0, 0, 0, 0, 0, 0, 0,	/* 0x00 - 0x07 */
1881    0, 0, 0, 0, 0, 0, 0, 0,	/* 0x08 - 0x0F */
1882    0, 0, 0, 0, 0, 0, 0, 0,	/* 0x10 - 0x17 */
1883    0, 0, 0, 0, 0, 0, 0, 0,	/* 0x18 - 0x1F */
1884    0, 1, 0, 0, 1, 1, 1, 1,	/* 0x20 - 0x27 */
1885    0, 0, 1, 1, 1, 1, 1, 1,	/* 0x28 - 0x2F */
1886    1, 1, 1, 1, 1, 1, 1, 1,	/* 0x30 - 0x37 */
1887    1, 1, 1, 0, 0, 1, 0, 0,	/* 0x38 - 0x3F */
1888    1, 1, 1, 1, 1, 1, 1, 1,	/* 0x40 - 0x47 */
1889    1, 1, 1, 1, 1, 1, 1, 1,	/* 0x48 - 0x4F */
1890    1, 1, 1, 1, 1, 1, 1, 1,	/* 0x50 - 0x57 */
1891    1, 1, 1, 0, 0, 0, 0, 1,	/* 0x58 - 0x5F */
1892    0, 1, 1, 1, 1, 1, 1, 1,	/* 0x60 - 0x67 */
1893    1, 1, 1, 1, 1, 1, 1, 1,	/* 0x68 - 0x6F */
1894    1, 1, 1, 1, 1, 1, 1, 1,	/* 0x70 - 0x77 */
1895    1, 1, 1, 0, 0, 0, 1, 0	/* 0x78 - 0x7F */
1896};
1897
1898#define	PCHARS_MASK 0x7F
1899
1900/*
1901 * This is the main L7 request message parse, we are called each time
1902 * new data is availble for a socket, each time a single buffer of the
1903 * entire message to date is given.
1904 *
1905 * Here we parse the request looking for the URI, parse it, and if a
1906 * supported scheme call the scheme parser to commplete the parse of any
1907 * headers which may further qualify the identity of the requested object
1908 * then lookup it up in the URI hash.
1909 *
1910 * Return B_TRUE for more processing.
1911 *
1912 * Note, at this time the parser supports the generic message format as
1913 * specified in RFC 822 with potentional limitations as specified in RFC
1914 * 2616 for HTTP messages.
1915 *
1916 * Note, the caller supports an mblk_t chain, for now the parser(s)
1917 * require the complete header in a single mblk_t. This is the common
1918 * case and certainly for high performance environments, if at a future
1919 * date mblk_t chains are important the parse can be reved to process
1920 * mblk_t chains.
1921 */
1922
1923boolean_t
1924nl7c_parse(struct sonode *so, boolean_t nonblocking, boolean_t *ret)
1925{
1926	sotpi_info_t *sti = SOTOTPI(so);
1927	char	*cp = (char *)sti->sti_nl7c_rcv_mp->b_rptr;
1928	char	*ep = (char *)sti->sti_nl7c_rcv_mp->b_wptr;
1929	char	*get = "GET ";
1930	char	*post = "POST ";
1931	char	c;
1932	char	*uris;
1933	uri_desc_t *uri = NULL;
1934	uri_desc_t *ruri = NULL;
1935	mblk_t	*reqmp;
1936	uint32_t hv = 0;
1937
1938	if ((reqmp = dupb(sti->sti_nl7c_rcv_mp)) == NULL) {
1939		nl7c_uri_pass_dupbfail++;
1940		goto pass;
1941	}
1942	/*
1943	 * Allocate and initialize minimumal state for the request
1944	 * uri_desc_t, in the cache hit case this uri_desc_t will
1945	 * be freed.
1946	 */
1947	uri = kmem_cache_alloc(nl7c_uri_kmc, KM_SLEEP);
1948	REF_INIT(uri, 1, nl7c_uri_inactive, nl7c_uri_kmc);
1949	uri->hash = NULL;
1950	uri->tail = NULL;
1951	uri->scheme = NULL;
1952	uri->count = 0;
1953	uri->reqmp = reqmp;
1954
1955	/*
1956	 * Set request time to current time.
1957	 */
1958	sti->sti_nl7c_rtime = gethrestime_sec();
1959
1960	/*
1961	 * Parse the Request-Line for the URI.
1962	 *
1963	 * For backwards HTTP version compatable reasons skip any leading
1964	 * CRLF (or CR or LF) line terminator(s) preceding Request-Line.
1965	 */
1966	while (cp < ep && (*cp == '\r' || *cp == '\n')) {
1967		cp++;
1968	}
1969	while (cp < ep && *get == *cp) {
1970		get++;
1971		cp++;
1972	}
1973	if (*get != 0) {
1974		/* Note a "GET", check for "POST" */
1975		while (cp < ep && *post == *cp) {
1976			post++;
1977			cp++;
1978		}
1979		if (*post != 0) {
1980			if (cp == ep) {
1981				nl7c_uri_more_get++;
1982				goto more;
1983			}
1984			/* Not a "GET" or a "POST", just pass */
1985			nl7c_uri_pass_method++;
1986			goto pass;
1987		}
1988		/* "POST", don't cache but still may want to parse */
1989		uri->hash = URI_TEMP;
1990	}
1991	/*
1992	 * Skip over URI path char(s) and save start and past end pointers.
1993	 */
1994	uris = cp;
1995	while (cp < ep && (c = *cp) != ' ' && c != '\r') {
1996		if (c == '?') {
1997			/* Don't cache but still may want to parse */
1998			uri->hash = URI_TEMP;
1999		}
2000		CHASH(hv, c);
2001		cp++;
2002	}
2003	if (c != '\r' && cp == ep) {
2004		nl7c_uri_more_eol++;
2005		goto more;
2006	}
2007	/*
2008	 * Request-Line URI parsed, pass the rest of the request on
2009	 * to the the http scheme parse.
2010	 */
2011	uri->path.cp = uris;
2012	uri->path.ep = cp;
2013	uri->hvalue = hv;
2014	if (! nl7c_http_request(&cp, ep, uri, so) || cp == NULL) {
2015		/*
2016		 * Parse not successful or pass on request, the pointer
2017		 * to the parse pointer "cp" is overloaded such that ! NULL
2018		 * for more data and NULL for bad parse of request or pass.
2019		 */
2020		if (cp != NULL) {
2021			nl7c_uri_more_http++;
2022			goto more;
2023		}
2024		nl7c_uri_pass_http++;
2025		goto pass;
2026	}
2027	if (uri->nocache) {
2028		uri->hash = URI_TEMP;
2029		(void) uri_lookup(uri, B_FALSE, nonblocking);
2030	} else if (uri->hash == URI_TEMP) {
2031		uri->nocache = B_TRUE;
2032		(void) uri_lookup(uri, B_FALSE, nonblocking);
2033	}
2034
2035	if (uri->hash == URI_TEMP) {
2036		if (sti->sti_nl7c_flags & NL7C_SOPERSIST) {
2037			/* Temporary URI so skip hash processing */
2038			nl7c_uri_request++;
2039			nl7c_uri_temp++;
2040			goto temp;
2041		}
2042		/* Not persistent so not interested in the response */
2043		nl7c_uri_pass_temp++;
2044		goto pass;
2045	}
2046	/*
2047	 * Check the URI hash for a cached response, save the request
2048	 * uri in case we need it below.
2049	 */
2050	ruri = uri;
2051	if ((uri = uri_lookup(uri, B_TRUE, nonblocking)) == NULL) {
2052		/*
2053		 * Failed to lookup due to nonblocking wait required,
2054		 * interrupted cv_wait_sig(), KM_NOSLEEP memory alloc
2055		 * failure, ... Just pass on this request.
2056		 */
2057		nl7c_uri_pass_addfail++;
2058		goto pass;
2059	}
2060	nl7c_uri_request++;
2061	if (uri->response.sz > 0) {
2062		/*
2063		 * We have the response cached, update recv mblk rptr
2064		 * to reflect the data consumed in parse.
2065		 */
2066		mblk_t	*mp = sti->sti_nl7c_rcv_mp;
2067
2068		if (cp == (char *)mp->b_wptr) {
2069			sti->sti_nl7c_rcv_mp = mp->b_cont;
2070			mp->b_cont = NULL;
2071			freeb(mp);
2072		} else {
2073			mp->b_rptr = (unsigned char *)cp;
2074		}
2075		nl7c_uri_hit++;
2076		/* If logging enabled log request */
2077		if (nl7c_logd_enabled) {
2078			ipaddr_t faddr;
2079
2080			if (so->so_family == AF_INET) {
2081				/* Only support IPv4 addrs */
2082				faddr = ((struct sockaddr_in *)
2083				    sti->sti_faddr_sa) ->sin_addr.s_addr;
2084			} else {
2085				faddr = 0;
2086			}
2087			/* XXX need to pass response type, e.g. 200, 304 */
2088			nl7c_logd_log(ruri, uri, sti->sti_nl7c_rtime, faddr);
2089		}
2090
2091		/* If conditional request check for substitute response */
2092		if (ruri->conditional) {
2093			uri = nl7c_http_cond(ruri, uri);
2094		}
2095
2096		/*
2097		 * Release reference on request URI, send the response out
2098		 * the socket, release reference on response uri, set the
2099		 * *ret value to B_TRUE to indicate request was consumed
2100		 * then return B_FALSE to indcate no more data needed.
2101		 */
2102		REF_RELE(ruri);
2103		(void) uri_response(so, uri);
2104		REF_RELE(uri);
2105		*ret = B_TRUE;
2106		return (B_FALSE);
2107	}
2108	/*
2109	 * Miss the cache, the request URI is in the cache waiting for
2110	 * application write-side data to fill it.
2111	 */
2112	nl7c_uri_miss++;
2113temp:
2114	/*
2115	 * A miss or temp URI for which response data is needed, link
2116	 * uri to so and so to uri, set WAITWRITE in the so such that
2117	 * read-side processing is suspended (so the next read() gets
2118	 * the request data) until a write() is processed by NL7C.
2119	 *
2120	 * Note, sti->sti_nl7c_uri now owns the REF_INIT() ref.
2121	 */
2122	uri->proc = so;
2123	sti->sti_nl7c_uri = uri;
2124	sti->sti_nl7c_flags |= NL7C_WAITWRITE;
2125	*ret = B_FALSE;
2126	return (B_FALSE);
2127
2128more:
2129	/* More data is needed, note fragmented recv not supported */
2130	nl7c_uri_more++;
2131
2132pass:
2133	/* Pass on this request */
2134	nl7c_uri_pass++;
2135	nl7c_uri_request++;
2136	if (ruri != NULL) {
2137		REF_RELE(ruri);
2138	}
2139	if (uri) {
2140		REF_RELE(uri);
2141	}
2142	sti->sti_nl7c_flags = 0;
2143	*ret = B_FALSE;
2144	return (B_FALSE);
2145}
2146