1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25#include <sys/strsubr.h>
26#include <sys/strsun.h>
27#include <sys/param.h>
28#include <sys/sysmacros.h>
29#include <vm/seg_map.h>
30#include <vm/seg_kpm.h>
31#include <sys/condvar_impl.h>
32#include <sys/sendfile.h>
33#include <fs/sockfs/nl7c.h>
34#include <fs/sockfs/nl7curi.h>
35#include <fs/sockfs/socktpi_impl.h>
36
37#include <inet/common.h>
38#include <inet/ip.h>
39#include <inet/ip6.h>
40#include <inet/tcp.h>
41#include <inet/led.h>
42#include <inet/mi.h>
43
44#include <inet/nca/ncadoorhdr.h>
45#include <inet/nca/ncalogd.h>
46#include <inet/nca/ncandd.h>
47
48#include <sys/promif.h>
49
50/*
51 * Some externs:
52 */
53
54extern boolean_t	nl7c_logd_enabled;
55extern void		nl7c_logd_log(uri_desc_t *, uri_desc_t *,
56			    time_t, ipaddr_t);
57extern boolean_t	nl7c_close_addr(struct sonode *);
58extern struct sonode	*nl7c_addr2portso(void *);
59extern uri_desc_t	*nl7c_http_cond(uri_desc_t *, uri_desc_t *);
60
61/*
62 * Various global tuneables:
63 */
64
65clock_t		nl7c_uri_ttl = -1;	/* TTL in seconds (-1 == infinite) */
66
67boolean_t	nl7c_use_kmem = B_FALSE; /* Force use of kmem (no segmap) */
68
69uint64_t	nl7c_file_prefetch = 1; /* File cache prefetch pages */
70
71uint64_t	nl7c_uri_max = 0;	/* Maximum bytes (0 == infinite) */
72uint64_t	nl7c_uri_bytes = 0;	/* Bytes of kmem used by URIs */
73
74/*
75 * Locals:
76 */
77
78static int	uri_rd_response(struct sonode *, uri_desc_t *,
79		    uri_rd_t *, boolean_t);
80static int	uri_response(struct sonode *, uri_desc_t *);
81
82/*
83 * HTTP scheme functions called from nl7chttp.c:
84 */
85
86boolean_t nl7c_http_request(char **, char *, uri_desc_t *, struct sonode *);
87boolean_t nl7c_http_response(char **, char *, uri_desc_t *, struct sonode *);
88boolean_t nl7c_http_cmp(void *, void *);
89mblk_t *nl7c_http_persist(struct sonode *);
90void nl7c_http_free(void *arg);
91void nl7c_http_init(void);
92
93/*
94 * Counters that need to move to kstat and/or be removed:
95 */
96
97volatile uint64_t nl7c_uri_request = 0;
98volatile uint64_t nl7c_uri_hit = 0;
99volatile uint64_t nl7c_uri_pass = 0;
100volatile uint64_t nl7c_uri_miss = 0;
101volatile uint64_t nl7c_uri_temp = 0;
102volatile uint64_t nl7c_uri_more = 0;
103volatile uint64_t nl7c_uri_data = 0;
104volatile uint64_t nl7c_uri_sendfilev = 0;
105volatile uint64_t nl7c_uri_reclaim_calls = 0;
106volatile uint64_t nl7c_uri_reclaim_cnt = 0;
107volatile uint64_t nl7c_uri_pass_urifail = 0;
108volatile uint64_t nl7c_uri_pass_dupbfail = 0;
109volatile uint64_t nl7c_uri_more_get = 0;
110volatile uint64_t nl7c_uri_pass_method = 0;
111volatile uint64_t nl7c_uri_pass_option = 0;
112volatile uint64_t nl7c_uri_more_eol = 0;
113volatile uint64_t nl7c_uri_more_http = 0;
114volatile uint64_t nl7c_uri_pass_http = 0;
115volatile uint64_t nl7c_uri_pass_addfail = 0;
116volatile uint64_t nl7c_uri_pass_temp = 0;
117volatile uint64_t nl7c_uri_expire = 0;
118volatile uint64_t nl7c_uri_purge = 0;
119volatile uint64_t nl7c_uri_NULL1 = 0;
120volatile uint64_t nl7c_uri_NULL2 = 0;
121volatile uint64_t nl7c_uri_close = 0;
122volatile uint64_t nl7c_uri_temp_close = 0;
123volatile uint64_t nl7c_uri_free = 0;
124volatile uint64_t nl7c_uri_temp_free = 0;
125volatile uint64_t nl7c_uri_temp_mk = 0;
126volatile uint64_t nl7c_uri_rd_EAGAIN = 0;
127
128/*
129 * Various kmem_cache_t's:
130 */
131
132kmem_cache_t *nl7c_uri_kmc;
133kmem_cache_t *nl7c_uri_rd_kmc;
134static kmem_cache_t *uri_desb_kmc;
135static kmem_cache_t *uri_segmap_kmc;
136
137static void uri_kmc_reclaim(void *);
138
139static void nl7c_uri_reclaim(void);
140
141/*
142 * The URI hash is a dynamically sized A/B bucket hash, when the current
143 * hash's average bucket chain length exceeds URI_HASH_AVRG a new hash of
144 * the next P2Ps[] size is created.
145 *
146 * All lookups are done in the current hash then the new hash (if any),
147 * if there is a new has then when a current hash bucket chain is examined
148 * any uri_desc_t members will be migrated to the new hash and when the
149 * last uri_desc_t has been migrated then the new hash will become the
150 * current and the previous current hash will be freed leaving a single
151 * hash.
152 *
153 * uri_hash_t - hash bucket (chain) type, contained in the uri_hash_ab[]
154 * and can be accessed only after aquiring the uri_hash_access lock (for
155 * READER or WRITER) then acquiring the lock uri_hash_t.lock, the uri_hash_t
156 * and all linked uri_desc_t.hash members are protected. Note, a REF_HOLD()
157 * is placed on all uri_desc_t uri_hash_t list members.
158 *
159 * uri_hash_access - rwlock for all uri_hash_* variables, READER for read
160 * access and WRITER for write access. Note, WRITER is only required for
161 * hash geometry changes.
162 *
163 * uri_hash_which - which uri_hash_ab[] is the current hash.
164 *
165 * uri_hash_n[] - the P2Ps[] index for each uri_hash_ab[].
166 *
167 * uri_hash_sz[] - the size for each uri_hash_ab[].
168 *
169 * uri_hash_cnt[] - the total uri_desc_t members for each uri_hash_ab[].
170 *
171 * uri_hash_overflow[] - the uri_hash_cnt[] for each uri_hash_ab[] when
172 * a new uri_hash_ab[] needs to be created.
173 *
174 * uri_hash_ab[] - the uri_hash_t entries.
175 *
176 * uri_hash_lru[] - the last uri_hash_ab[] walked for lru reclaim.
177 */
178
179typedef struct uri_hash_s {
180	struct uri_desc_s	*list;		/* List of uri_t(s) */
181	kmutex_t		lock;
182} uri_hash_t;
183
184#define	URI_HASH_AVRG	5	/* Desired average hash chain length */
185#define	URI_HASH_N_INIT	9	/* P2Ps[] initial index */
186
187static krwlock_t	uri_hash_access;
188static uint32_t		uri_hash_which = 0;
189static uint32_t		uri_hash_n[2] = {URI_HASH_N_INIT, 0};
190static uint32_t		uri_hash_sz[2] = {0, 0};
191static uint32_t		uri_hash_cnt[2] = {0, 0};
192static uint32_t		uri_hash_overflow[2] = {0, 0};
193static uri_hash_t	*uri_hash_ab[2] = {NULL, NULL};
194static uri_hash_t	*uri_hash_lru[2] = {NULL, NULL};
195
196/*
197 * Primes for N of 3 - 24 where P is first prime less then (2^(N-1))+(2^(N-2))
198 * these primes have been foud to be useful for prime sized hash tables.
199 */
200
201static const int P2Ps[] = {
202	0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,
203	6143, 12281, 24571, 49139, 98299, 196597, 393209,
204	786431, 1572853, 3145721, 6291449, 12582893, 0};
205
206/*
207 * Hash macros:
208 *
209 *    H2A(char *cp, char *ep, char c) - convert the escaped octet (ASCII)
210 *    hex multichar of the format "%HH" pointeded to by *cp to a char and
211 *    return in c, *ep points to past end of (char *), on return *cp will
212 *    point to the last char consumed.
213 *
214 *    URI_HASH(unsigned hix, char *cp, char *ep) - hash the char(s) from
215 *    *cp to *ep to the unsigned hix, cp nor ep are modified.
216 *
217 *    URI_HASH_IX(unsigned hix, int which) - convert the hash value hix to
218 *    a hash index 0 - (uri_hash_sz[which] - 1).
219 *
220 *    URI_HASH_MIGRATE(from, hp, to) - migrate the uri_hash_t *hp list
221 *    uri_desc_t members from hash from to hash to.
222 *
223 *    URI_HASH_UNLINK(cur, new, hp, puri, uri) - unlink the uri_desc_t
224 *    *uri which is a member of the uri_hash_t *hp list with a previous
225 *    list member of *puri for the uri_hash_ab[] cur. After unlinking
226 *    check for cur hash empty, if so make new cur. Note, as this macro
227 *    can change a hash chain it needs to be run under hash_access as
228 *    RW_WRITER, futher as it can change the new hash to cur any access
229 *    to the hash state must be done after either dropping locks and
230 *    starting over or making sure the global state is consistent after
231 *    as before.
232 */
233
234#define	H2A(cp, ep, c) {						\
235	int	_h = 2;							\
236	int	_n = 0;							\
237	char	_hc;							\
238									\
239	while (_h > 0 && ++(cp) < (ep)) {				\
240		if (_h == 1)						\
241			_n *= 0x10;					\
242		_hc = *(cp);						\
243		if (_hc >= '0' && _hc <= '9')				\
244			_n += _hc - '0';				\
245		else if (_hc >= 'a' || _hc <= 'f')			\
246			_n += _hc - 'W';				\
247		else if (_hc >= 'A' || _hc <= 'F')			\
248			_n += _hc - '7';				\
249		_h--;							\
250	}								\
251	(c) = _n;							\
252}
253
254#define	URI_HASH(hv, cp, ep) {						\
255	char	*_s = (cp);						\
256	char	_c;							\
257									\
258	while (_s < (ep)) {						\
259		if ((_c = *_s) == '%') {				\
260			H2A(_s, (ep), _c);				\
261		}							\
262		CHASH(hv, _c);						\
263		_s++;							\
264	}								\
265}
266
267#define	URI_HASH_IX(hix, which) (hix) = (hix) % (uri_hash_sz[(which)])
268
269#define	URI_HASH_MIGRATE(from, hp, to) {				\
270	uri_desc_t	*_nuri;						\
271	uint32_t	_nhix;						\
272	uri_hash_t	*_nhp;						\
273									\
274	mutex_enter(&(hp)->lock);					\
275	while ((_nuri = (hp)->list) != NULL) {				\
276		(hp)->list = _nuri->hash;				\
277		atomic_dec_32(&uri_hash_cnt[(from)]);		\
278		atomic_inc_32(&uri_hash_cnt[(to)]);			\
279		_nhix = _nuri->hvalue;					\
280		URI_HASH_IX(_nhix, to);					\
281		_nhp = &uri_hash_ab[(to)][_nhix];			\
282		mutex_enter(&_nhp->lock);				\
283		_nuri->hash = _nhp->list;				\
284		_nhp->list = _nuri;					\
285		_nuri->hit = 0;						\
286		mutex_exit(&_nhp->lock);				\
287	}								\
288	mutex_exit(&(hp)->lock);					\
289}
290
291#define	URI_HASH_UNLINK(cur, new, hp, puri, uri) {			\
292	if ((puri) != NULL) {						\
293		(puri)->hash = (uri)->hash;				\
294	} else {							\
295		(hp)->list = (uri)->hash;				\
296	}								\
297	if (atomic_dec_32_nv(&uri_hash_cnt[(cur)]) == 0 &&		\
298	    uri_hash_ab[(new)] != NULL) {				\
299		kmem_free(uri_hash_ab[cur],				\
300		    sizeof (uri_hash_t) * uri_hash_sz[cur]);		\
301		uri_hash_ab[(cur)] = NULL;				\
302		uri_hash_lru[(cur)] = NULL;				\
303		uri_hash_which = (new);					\
304	} else {							\
305		uri_hash_lru[(cur)] = (hp);				\
306	}								\
307}
308
309void
310nl7c_uri_init(void)
311{
312	uint32_t	cur = uri_hash_which;
313
314	rw_init(&uri_hash_access, NULL, RW_DEFAULT, NULL);
315
316	uri_hash_sz[cur] = P2Ps[URI_HASH_N_INIT];
317	uri_hash_overflow[cur] = P2Ps[URI_HASH_N_INIT] * URI_HASH_AVRG;
318	uri_hash_ab[cur] = kmem_zalloc(sizeof (uri_hash_t) * uri_hash_sz[cur],
319	    KM_SLEEP);
320	uri_hash_lru[cur] = uri_hash_ab[cur];
321
322	nl7c_uri_kmc = kmem_cache_create("NL7C_uri_kmc", sizeof (uri_desc_t),
323	    0, NULL, NULL, uri_kmc_reclaim, NULL, NULL, 0);
324
325	nl7c_uri_rd_kmc = kmem_cache_create("NL7C_uri_rd_kmc",
326	    sizeof (uri_rd_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
327
328	uri_desb_kmc = kmem_cache_create("NL7C_uri_desb_kmc",
329	    sizeof (uri_desb_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
330
331	uri_segmap_kmc = kmem_cache_create("NL7C_uri_segmap_kmc",
332	    sizeof (uri_segmap_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
333
334	nl7c_http_init();
335}
336
337#define	CV_SZ	16
338
339void
340nl7c_mi_report_hash(mblk_t *mp)
341{
342	uri_hash_t	*hp, *pend;
343	uri_desc_t	*uri;
344	uint32_t	cur;
345	uint32_t	new;
346	int		n, nz, tot;
347	uint32_t	cv[CV_SZ + 1];
348
349	rw_enter(&uri_hash_access, RW_READER);
350	cur = uri_hash_which;
351	new = cur ? 0 : 1;
352next:
353	for (n = 0; n <= CV_SZ; n++)
354		cv[n] = 0;
355	nz = 0;
356	tot = 0;
357	hp = &uri_hash_ab[cur][0];
358	pend = &uri_hash_ab[cur][uri_hash_sz[cur]];
359	while (hp < pend) {
360		n = 0;
361		for (uri = hp->list; uri != NULL; uri = uri->hash) {
362			n++;
363		}
364		tot += n;
365		if (n > 0)
366			nz++;
367		if (n > CV_SZ)
368			n = CV_SZ;
369		cv[n]++;
370		hp++;
371	}
372
373	(void) mi_mpprintf(mp, "\nHash=%s, Buckets=%d, "
374	    "Avrg=%d\nCount by bucket:", cur != new ? "CUR" : "NEW",
375	    uri_hash_sz[cur], nz != 0 ? ((tot * 10 + 5) / nz) / 10 : 0);
376	(void) mi_mpprintf(mp, "Free=%d", cv[0]);
377	for (n = 1; n < CV_SZ; n++) {
378		int	pn = 0;
379		char	pv[5];
380		char	*pp = pv;
381
382		for (pn = n; pn < 1000; pn *= 10)
383			*pp++ = ' ';
384		*pp = 0;
385		(void) mi_mpprintf(mp, "%s%d=%d", pv, n, cv[n]);
386	}
387	(void) mi_mpprintf(mp, "Long=%d", cv[CV_SZ]);
388
389	if (cur != new && uri_hash_ab[new] != NULL) {
390		cur = new;
391		goto next;
392	}
393	rw_exit(&uri_hash_access);
394}
395
396void
397nl7c_mi_report_uri(mblk_t *mp)
398{
399	uri_hash_t	*hp;
400	uri_desc_t	*uri;
401	uint32_t	cur;
402	uint32_t	new;
403	int		ix;
404	int		ret;
405	char		sc;
406
407	rw_enter(&uri_hash_access, RW_READER);
408	cur = uri_hash_which;
409	new = cur ? 0 : 1;
410next:
411	for (ix = 0; ix < uri_hash_sz[cur]; ix++) {
412		hp = &uri_hash_ab[cur][ix];
413		mutex_enter(&hp->lock);
414		uri = hp->list;
415		while (uri != NULL) {
416			sc = *(uri->path.ep);
417			*(uri->path.ep) = 0;
418			ret = mi_mpprintf(mp, "%s: %d %d %d",
419			    uri->path.cp, (int)uri->resplen,
420			    (int)uri->respclen, (int)uri->count);
421			*(uri->path.ep) = sc;
422			if (ret == -1) break;
423			uri = uri->hash;
424		}
425		mutex_exit(&hp->lock);
426		if (ret == -1) break;
427	}
428	if (ret != -1 && cur != new && uri_hash_ab[new] != NULL) {
429		cur = new;
430		goto next;
431	}
432	rw_exit(&uri_hash_access);
433}
434
435/*
436 * The uri_desc_t ref_t inactive function called on the last REF_RELE(),
437 * free all resources contained in the uri_desc_t. Note, the uri_desc_t
438 * will be freed by REF_RELE() on return.
439 */
440
441void
442nl7c_uri_inactive(uri_desc_t *uri)
443{
444	int64_t	 bytes = 0;
445
446	if (uri->tail) {
447		uri_rd_t *rdp = &uri->response;
448		uri_rd_t *free = NULL;
449
450		while (rdp) {
451			if (rdp->off == -1) {
452				bytes += rdp->sz;
453				kmem_free(rdp->data.kmem, rdp->sz);
454			} else {
455				VN_RELE(rdp->data.vnode);
456			}
457			rdp = rdp->next;
458			if (free != NULL) {
459				kmem_cache_free(nl7c_uri_rd_kmc, free);
460			}
461			free = rdp;
462		}
463	}
464	if (bytes) {
465		atomic_add_64(&nl7c_uri_bytes, -bytes);
466	}
467	if (uri->scheme != NULL) {
468		nl7c_http_free(uri->scheme);
469	}
470	if (uri->reqmp) {
471		freeb(uri->reqmp);
472	}
473}
474
475/*
476 * The reclaim is called by the kmem subsystem when kmem is running
477 * low. More work is needed to determine the best reclaim policy, for
478 * now we just manipulate the nl7c_uri_max global maximum bytes threshold
479 * value using a simple arithmetic backoff of the value every time this
480 * function is called then call uri_reclaim() to enforce it.
481 *
482 * Note, this value remains in place and enforced for all subsequent
483 * URI request/response processing.
484 *
485 * Note, nl7c_uri_max is currently initialized to 0 or infinite such that
486 * the first call here set it to the current uri_bytes value then backoff
487 * from there.
488 *
489 * XXX how do we determine when to increase nl7c_uri_max ???
490 */
491
492/*ARGSUSED*/
493static void
494uri_kmc_reclaim(void *arg)
495{
496	uint64_t new_max;
497
498	if ((new_max = nl7c_uri_max) == 0) {
499		/* Currently infinite, initialize to current bytes used */
500		nl7c_uri_max = nl7c_uri_bytes;
501		new_max = nl7c_uri_bytes;
502	}
503	if (new_max > 1) {
504		/* Lower max_bytes to 93% of current value */
505		new_max >>= 1;			/* 50% */
506		new_max += (new_max >> 1);	/* 75% */
507		new_max += (new_max >> 2);	/* 93% */
508		if (new_max < nl7c_uri_max)
509			nl7c_uri_max = new_max;
510		else
511			nl7c_uri_max = 1;
512	}
513	nl7c_uri_reclaim();
514}
515
516/*
517 * Delete a uri_desc_t from the URI hash.
518 */
519
520static void
521uri_delete(uri_desc_t *del)
522{
523	uint32_t	hix;
524	uri_hash_t	*hp;
525	uri_desc_t	*uri;
526	uri_desc_t	*puri;
527	uint32_t	cur;
528	uint32_t	new;
529
530	ASSERT(del->hash != URI_TEMP);
531	rw_enter(&uri_hash_access, RW_WRITER);
532	cur = uri_hash_which;
533	new = cur ? 0 : 1;
534next:
535	puri = NULL;
536	hix = del->hvalue;
537	URI_HASH_IX(hix, cur);
538	hp = &uri_hash_ab[cur][hix];
539	for (uri = hp->list; uri != NULL; uri = uri->hash) {
540		if (uri != del) {
541			puri = uri;
542			continue;
543		}
544		/*
545		 * Found the URI, unlink from the hash chain,
546		 * drop locks, ref release it.
547		 */
548		URI_HASH_UNLINK(cur, new, hp, puri, uri);
549		rw_exit(&uri_hash_access);
550		REF_RELE(uri);
551		return;
552	}
553	if (cur != new && uri_hash_ab[new] != NULL) {
554		/*
555		 * Not found in current hash and have a new hash so
556		 * check the new hash next.
557		 */
558		cur = new;
559		goto next;
560	}
561	rw_exit(&uri_hash_access);
562}
563
564/*
565 * Add a uri_desc_t to the URI hash.
566 */
567
568static void
569uri_add(uri_desc_t *uri, krw_t rwlock, boolean_t nonblocking)
570{
571	uint32_t	hix;
572	uri_hash_t	*hp;
573	uint32_t	cur = uri_hash_which;
574	uint32_t	new = cur ? 0 : 1;
575
576	/*
577	 * Caller of uri_add() must hold the uri_hash_access rwlock.
578	 */
579	ASSERT((rwlock == RW_READER && RW_READ_HELD(&uri_hash_access)) ||
580	    (rwlock == RW_WRITER && RW_WRITE_HELD(&uri_hash_access)));
581	/*
582	 * uri_add() always succeeds so add a hash ref to the URI now.
583	 */
584	REF_HOLD(uri);
585again:
586	hix = uri->hvalue;
587	URI_HASH_IX(hix, cur);
588	if (uri_hash_ab[new] == NULL &&
589	    uri_hash_cnt[cur] < uri_hash_overflow[cur]) {
590		/*
591		 * Easy case, no new hash and current hasn't overflowed,
592		 * add URI to current hash and return.
593		 *
594		 * Note, the check for uri_hash_cnt[] above aren't done
595		 * atomictally, i.e. multiple threads can be in this code
596		 * as RW_READER and update the cnt[], this isn't a problem
597		 * as the check is only advisory.
598		 */
599	fast:
600		atomic_inc_32(&uri_hash_cnt[cur]);
601		hp = &uri_hash_ab[cur][hix];
602		mutex_enter(&hp->lock);
603		uri->hash = hp->list;
604		hp->list = uri;
605		mutex_exit(&hp->lock);
606		rw_exit(&uri_hash_access);
607		return;
608	}
609	if (uri_hash_ab[new] == NULL) {
610		/*
611		 * Need a new a or b hash, if not already RW_WRITER
612		 * try to upgrade our lock to writer.
613		 */
614		if (rwlock != RW_WRITER && ! rw_tryupgrade(&uri_hash_access)) {
615			/*
616			 * Upgrade failed, we can't simple exit and reenter
617			 * the lock as after the exit and before the reenter
618			 * the whole world can change so just wait for writer
619			 * then do everything again.
620			 */
621			if (nonblocking) {
622				/*
623				 * Can't block, use fast-path above.
624				 *
625				 * XXX should have a background thread to
626				 * handle new ab[] in this case so as to
627				 * not overflow the cur hash to much.
628				 */
629				goto fast;
630			}
631			rw_exit(&uri_hash_access);
632			rwlock = RW_WRITER;
633			rw_enter(&uri_hash_access, rwlock);
634			cur = uri_hash_which;
635			new = cur ? 0 : 1;
636			goto again;
637		}
638		rwlock = RW_WRITER;
639		if (uri_hash_ab[new] == NULL) {
640			/*
641			 * Still need a new hash, allocate and initialize
642			 * the new hash.
643			 */
644			uri_hash_n[new] = uri_hash_n[cur] + 1;
645			if (uri_hash_n[new] == 0) {
646				/*
647				 * No larger P2Ps[] value so use current,
648				 * i.e. 2 of the largest are better than 1 ?
649				 */
650				uri_hash_n[new] = uri_hash_n[cur];
651				cmn_err(CE_NOTE, "NL7C: hash index overflow");
652			}
653			uri_hash_sz[new] = P2Ps[uri_hash_n[new]];
654			ASSERT(uri_hash_cnt[new] == 0);
655			uri_hash_overflow[new] = uri_hash_sz[new] *
656			    URI_HASH_AVRG;
657			uri_hash_ab[new] = kmem_zalloc(sizeof (uri_hash_t) *
658			    uri_hash_sz[new], nonblocking ? KM_NOSLEEP :
659			    KM_SLEEP);
660			if (uri_hash_ab[new] == NULL) {
661				/*
662				 * Alloc failed, use fast-path above.
663				 *
664				 * XXX should have a background thread to
665				 * handle new ab[] in this case so as to
666				 * not overflow the cur hash to much.
667				 */
668				goto fast;
669			}
670			uri_hash_lru[new] = uri_hash_ab[new];
671		}
672	}
673	/*
674	 * Hashed against current hash so migrate any current hash chain
675	 * members, if any.
676	 *
677	 * Note, the hash chain list can be checked for a non empty list
678	 * outside of the hash chain list lock as the hash chain struct
679	 * can't be destroyed while in the uri_hash_access rwlock, worst
680	 * case is that a non empty list is found and after acquiring the
681	 * lock another thread beats us to it (i.e. migrated the list).
682	 */
683	hp = &uri_hash_ab[cur][hix];
684	if (hp->list != NULL) {
685		URI_HASH_MIGRATE(cur, hp, new);
686	}
687	/*
688	 * If new hash has overflowed before current hash has been
689	 * completely migrated then walk all current hash chains and
690	 * migrate list members now.
691	 */
692	if (atomic_inc_32_nv(&uri_hash_cnt[new]) >= uri_hash_overflow[new]) {
693		for (hix = 0; hix < uri_hash_sz[cur]; hix++) {
694			hp = &uri_hash_ab[cur][hix];
695			if (hp->list != NULL) {
696				URI_HASH_MIGRATE(cur, hp, new);
697			}
698		}
699	}
700	/*
701	 * Add URI to new hash.
702	 */
703	hix = uri->hvalue;
704	URI_HASH_IX(hix, new);
705	hp = &uri_hash_ab[new][hix];
706	mutex_enter(&hp->lock);
707	uri->hash = hp->list;
708	hp->list = uri;
709	mutex_exit(&hp->lock);
710	/*
711	 * Last, check to see if last cur hash chain has been
712	 * migrated, if so free cur hash and make new hash cur.
713	 */
714	if (uri_hash_cnt[cur] == 0) {
715		/*
716		 * If we don't already hold the uri_hash_access rwlock for
717		 * RW_WRITE try to upgrade to RW_WRITE and if successful
718		 * check again and to see if still need to do the free.
719		 */
720		if ((rwlock == RW_WRITER || rw_tryupgrade(&uri_hash_access)) &&
721		    uri_hash_cnt[cur] == 0 && uri_hash_ab[new] != 0) {
722			kmem_free(uri_hash_ab[cur],
723			    sizeof (uri_hash_t) * uri_hash_sz[cur]);
724			uri_hash_ab[cur] = NULL;
725			uri_hash_lru[cur] = NULL;
726			uri_hash_which = new;
727		}
728	}
729	rw_exit(&uri_hash_access);
730}
731
732/*
733 * Lookup a uri_desc_t in the URI hash, if found free the request uri_desc_t
734 * and return the found uri_desc_t with a REF_HOLD() placed on it. Else, if
735 * add B_TRUE use the request URI to create a new hash entry. Else if add
736 * B_FALSE ...
737 */
738
739static uri_desc_t *
740uri_lookup(uri_desc_t *ruri, boolean_t add, boolean_t nonblocking)
741{
742	uint32_t	hix;
743	uri_hash_t	*hp;
744	uri_desc_t	*uri;
745	uri_desc_t	*puri;
746	uint32_t	cur;
747	uint32_t	new;
748	char		*rcp = ruri->path.cp;
749	char		*rep = ruri->path.ep;
750
751again:
752	rw_enter(&uri_hash_access, RW_READER);
753	cur = uri_hash_which;
754	new = cur ? 0 : 1;
755nexthash:
756	puri = NULL;
757	hix = ruri->hvalue;
758	URI_HASH_IX(hix, cur);
759	hp = &uri_hash_ab[cur][hix];
760	mutex_enter(&hp->lock);
761	for (uri = hp->list; uri != NULL; uri = uri->hash) {
762		char	*ap = uri->path.cp;
763		char	*bp = rcp;
764		char	a, b;
765
766		/* Compare paths */
767		while (bp < rep && ap < uri->path.ep) {
768			if ((a = *ap) == '%') {
769				/* Escaped hex multichar, convert it */
770				H2A(ap, uri->path.ep, a);
771			}
772			if ((b = *bp) == '%') {
773				/* Escaped hex multichar, convert it */
774				H2A(bp, rep, b);
775			}
776			if (a != b) {
777				/* Char's don't match */
778				goto nexturi;
779			}
780			ap++;
781			bp++;
782		}
783		if (bp != rep || ap != uri->path.ep) {
784			/* Not same length */
785			goto nexturi;
786		}
787		ap = uri->auth.cp;
788		bp = ruri->auth.cp;
789		if (ap != NULL) {
790			if (bp == NULL) {
791				/* URI has auth request URI doesn't */
792				goto nexturi;
793			}
794			while (bp < ruri->auth.ep && ap < uri->auth.ep) {
795				if ((a = *ap) == '%') {
796					/* Escaped hex multichar, convert it */
797					H2A(ap, uri->path.ep, a);
798				}
799				if ((b = *bp) == '%') {
800					/* Escaped hex multichar, convert it */
801					H2A(bp, rep, b);
802				}
803				if (a != b) {
804					/* Char's don't match */
805					goto nexturi;
806				}
807				ap++;
808				bp++;
809			}
810			if (bp != ruri->auth.ep || ap != uri->auth.ep) {
811				/* Not same length */
812				goto nexturi;
813			}
814		} else if (bp != NULL) {
815			/* URI doesn't have auth and request URI does */
816			goto nexturi;
817		}
818		/*
819		 * Have a path/auth match so before any other processing
820		 * of requested URI, check for expire or request no cache
821		 * purge.
822		 */
823		if (uri->expire >= 0 && uri->expire <= ddi_get_lbolt() ||
824		    ruri->nocache) {
825			/*
826			 * URI has expired or request specified to not use
827			 * the cached version, unlink the URI from the hash
828			 * chain, release all locks, release the hash ref
829			 * on the URI, and last look it up again.
830			 *
831			 * Note, this will cause all variants of the named
832			 * URI to be purged.
833			 */
834			if (puri != NULL) {
835				puri->hash = uri->hash;
836			} else {
837				hp->list = uri->hash;
838			}
839			mutex_exit(&hp->lock);
840			atomic_dec_32(&uri_hash_cnt[cur]);
841			rw_exit(&uri_hash_access);
842			if (ruri->nocache)
843				nl7c_uri_purge++;
844			else
845				nl7c_uri_expire++;
846			REF_RELE(uri);
847			goto again;
848		}
849		if (uri->scheme != NULL) {
850			/*
851			 * URI has scheme private qualifier(s), if request
852			 * URI doesn't or if no match skip this URI.
853			 */
854			if (ruri->scheme == NULL ||
855			    ! nl7c_http_cmp(uri->scheme, ruri->scheme))
856				goto nexturi;
857		} else if (ruri->scheme != NULL) {
858			/*
859			 * URI doesn't have scheme private qualifiers but
860			 * request URI does, no match, skip this URI.
861			 */
862			goto nexturi;
863		}
864		/*
865		 * Have a match, ready URI for return, first put a reference
866		 * hold on the URI, if this URI is currently being processed
867		 * then have to wait for the processing to be completed and
868		 * redo the lookup, else return it.
869		 */
870		REF_HOLD(uri);
871		mutex_enter(&uri->proclock);
872		if (uri->proc != NULL) {
873			/* The URI is being processed, wait for completion */
874			mutex_exit(&hp->lock);
875			rw_exit(&uri_hash_access);
876			if (! nonblocking &&
877			    cv_wait_sig(&uri->waiting, &uri->proclock)) {
878				/*
879				 * URI has been processed but things may
880				 * have changed while we were away so do
881				 * most everything again.
882				 */
883				mutex_exit(&uri->proclock);
884				REF_RELE(uri);
885				goto again;
886			} else {
887				/*
888				 * A nonblocking socket or an interrupted
889				 * cv_wait_sig() in the first case can't
890				 * block waiting for the processing of the
891				 * uri hash hit uri to complete, in both
892				 * cases just return failure to lookup.
893				 */
894				mutex_exit(&uri->proclock);
895				REF_RELE(uri);
896				return (NULL);
897			}
898		}
899		mutex_exit(&uri->proclock);
900		uri->hit++;
901		mutex_exit(&hp->lock);
902		rw_exit(&uri_hash_access);
903		return (uri);
904	nexturi:
905		puri = uri;
906	}
907	mutex_exit(&hp->lock);
908	if (cur != new && uri_hash_ab[new] != NULL) {
909		/*
910		 * Not found in current hash and have a new hash so
911		 * check the new hash next.
912		 */
913		cur = new;
914		goto nexthash;
915	}
916add:
917	if (! add) {
918		/* Lookup only so return failure */
919		rw_exit(&uri_hash_access);
920		return (NULL);
921	}
922	/*
923	 * URI not hashed, finish intialization of the
924	 * request URI, add it to the hash, return it.
925	 */
926	ruri->hit = 0;
927	ruri->expire = -1;
928	ruri->response.sz = 0;
929	ruri->proc = (struct sonode *)~0;
930	cv_init(&ruri->waiting, NULL, CV_DEFAULT, NULL);
931	mutex_init(&ruri->proclock, NULL, MUTEX_DEFAULT, NULL);
932	uri_add(ruri, RW_READER, nonblocking);
933	/* uri_add() has done rw_exit(&uri_hash_access) */
934	return (ruri);
935}
936
937/*
938 * Reclaim URIs until max cache size threshold has been reached.
939 *
940 * A CLOCK based reclaim modified with a history (hit counter) counter.
941 */
942
943static void
944nl7c_uri_reclaim(void)
945{
946	uri_hash_t	*hp, *start, *pend;
947	uri_desc_t	*uri;
948	uri_desc_t	*puri;
949	uint32_t	cur;
950	uint32_t	new;
951
952	nl7c_uri_reclaim_calls++;
953again:
954	rw_enter(&uri_hash_access, RW_WRITER);
955	cur = uri_hash_which;
956	new = cur ? 0 : 1;
957next:
958	hp = uri_hash_lru[cur];
959	start = hp;
960	pend = &uri_hash_ab[cur][uri_hash_sz[cur]];
961	while (nl7c_uri_bytes > nl7c_uri_max) {
962		puri = NULL;
963		for (uri = hp->list; uri != NULL; uri = uri->hash) {
964			if (uri->hit != 0) {
965				/*
966				 * Decrement URI activity counter and skip.
967				 */
968				uri->hit--;
969				puri = uri;
970				continue;
971			}
972			if (uri->proc != NULL) {
973				/*
974				 * Currently being processed by a socket, skip.
975				 */
976				continue;
977			}
978			/*
979			 * Found a candidate, no hit(s) since added or last
980			 * reclaim pass, unlink from it's hash chain, update
981			 * lru scan pointer, drop lock, ref release it.
982			 */
983			URI_HASH_UNLINK(cur, new, hp, puri, uri);
984			if (cur == uri_hash_which) {
985				if (++hp == pend) {
986					/* Wrap pointer */
987					hp = uri_hash_ab[cur];
988				}
989				uri_hash_lru[cur] = hp;
990			}
991			rw_exit(&uri_hash_access);
992			REF_RELE(uri);
993			nl7c_uri_reclaim_cnt++;
994			goto again;
995		}
996		if (++hp == pend) {
997			/* Wrap pointer */
998			hp = uri_hash_ab[cur];
999		}
1000		if (hp == start) {
1001			if (cur != new && uri_hash_ab[new] != NULL) {
1002				/*
1003				 * Done with the current hash and have a
1004				 * new hash so check the new hash next.
1005				 */
1006				cur = new;
1007				goto next;
1008			}
1009		}
1010	}
1011	rw_exit(&uri_hash_access);
1012}
1013
1014/*
1015 * Called for a socket which is being freed prior to close, e.g. errored.
1016 */
1017
1018void
1019nl7c_urifree(struct sonode *so)
1020{
1021	sotpi_info_t *sti = SOTOTPI(so);
1022	uri_desc_t *uri = (uri_desc_t *)sti->sti_nl7c_uri;
1023
1024	sti->sti_nl7c_uri = NULL;
1025	if (uri->hash != URI_TEMP) {
1026		uri_delete(uri);
1027		mutex_enter(&uri->proclock);
1028		uri->proc = NULL;
1029		if (CV_HAS_WAITERS(&uri->waiting)) {
1030			cv_broadcast(&uri->waiting);
1031		}
1032		mutex_exit(&uri->proclock);
1033		nl7c_uri_free++;
1034	} else {
1035		/* No proclock as uri exclusively owned by so */
1036		uri->proc = NULL;
1037		nl7c_uri_temp_free++;
1038	}
1039	REF_RELE(uri);
1040}
1041
1042/*
1043 * ...
1044 *
1045 *	< 0	need more data
1046 *
1047 *	  0	parse complete
1048 *
1049 *	> 0	parse error
1050 */
1051
1052volatile uint64_t nl7c_resp_pfail = 0;
1053volatile uint64_t nl7c_resp_ntemp = 0;
1054volatile uint64_t nl7c_resp_pass = 0;
1055
1056static int
1057nl7c_resp_parse(struct sonode *so, uri_desc_t *uri, char *data, int sz)
1058{
1059	if (! nl7c_http_response(&data, &data[sz], uri, so)) {
1060		if (data == NULL) {
1061			/* Parse fail */
1062			goto pfail;
1063		}
1064		/* More data */
1065		data = NULL;
1066	} else if (data == NULL) {
1067		goto pass;
1068	}
1069	if (uri->hash != URI_TEMP && uri->nocache) {
1070		/*
1071		 * After response parse now no cache,
1072		 * delete it from cache, wakeup any
1073		 * waiters on this URI, make URI_TEMP.
1074		 */
1075		uri_delete(uri);
1076		mutex_enter(&uri->proclock);
1077		if (CV_HAS_WAITERS(&uri->waiting)) {
1078			cv_broadcast(&uri->waiting);
1079		}
1080		mutex_exit(&uri->proclock);
1081		uri->hash = URI_TEMP;
1082		nl7c_uri_temp_mk++;
1083	}
1084	if (data == NULL) {
1085		/* More data needed */
1086		return (-1);
1087	}
1088	/* Success */
1089	return (0);
1090
1091pfail:
1092	nl7c_resp_pfail++;
1093	return (EINVAL);
1094
1095pass:
1096	nl7c_resp_pass++;
1097	return (ENOTSUP);
1098}
1099
1100/*
1101 * Called to sink application response data, the processing of the data
1102 * is the same for a cached or temp URI (i.e. a URI for which we aren't
1103 * going to cache the URI but want to parse it for detecting response
1104 * data end such that for a persistent connection we can parse the next
1105 * request).
1106 *
1107 * On return 0 is returned for sink success, > 0 on error, and < 0 on
1108 * no so URI (note, data not sinked).
1109 */
1110
1111int
1112nl7c_data(struct sonode *so, uio_t *uio)
1113{
1114	sotpi_info_t	*sti = SOTOTPI(so);
1115	uri_desc_t	*uri = (uri_desc_t *)sti->sti_nl7c_uri;
1116	iovec_t		*iov;
1117	int		cnt;
1118	int		sz = uio->uio_resid;
1119	char		*data, *alloc;
1120	char		*bp;
1121	uri_rd_t	*rdp;
1122	boolean_t	first;
1123	int		error, perror;
1124
1125	nl7c_uri_data++;
1126
1127	if (uri == NULL) {
1128		/* Socket & NL7C out of sync, disable NL7C */
1129		sti->sti_nl7c_flags = 0;
1130		nl7c_uri_NULL1++;
1131		return (-1);
1132	}
1133
1134	if (sti->sti_nl7c_flags & NL7C_WAITWRITE) {
1135		sti->sti_nl7c_flags &= ~NL7C_WAITWRITE;
1136		first = B_TRUE;
1137	} else {
1138		first = B_FALSE;
1139	}
1140
1141	alloc = kmem_alloc(sz, KM_SLEEP);
1142	URI_RD_ADD(uri, rdp, sz, -1);
1143	if (rdp == NULL) {
1144		error = ENOMEM;
1145		goto fail;
1146	}
1147
1148	if (uri->hash != URI_TEMP && uri->count > nca_max_cache_size) {
1149		uri_delete(uri);
1150		uri->hash = URI_TEMP;
1151	}
1152	data = alloc;
1153	alloc = NULL;
1154	rdp->data.kmem = data;
1155	atomic_add_64(&nl7c_uri_bytes, sz);
1156
1157	bp = data;
1158	while (uio->uio_resid > 0) {
1159		iov = uio->uio_iov;
1160		if ((cnt = iov->iov_len) == 0) {
1161			goto next;
1162		}
1163		cnt = MIN(cnt, uio->uio_resid);
1164		error = xcopyin(iov->iov_base, bp, cnt);
1165		if (error)
1166			goto fail;
1167
1168		iov->iov_base += cnt;
1169		iov->iov_len -= cnt;
1170		uio->uio_resid -= cnt;
1171		uio->uio_loffset += cnt;
1172		bp += cnt;
1173	next:
1174		uio->uio_iov++;
1175		uio->uio_iovcnt--;
1176	}
1177
1178	/* Successfull sink of data, response parse the data */
1179	perror = nl7c_resp_parse(so, uri, data, sz);
1180
1181	/* Send the data out the connection */
1182	error = uri_rd_response(so, uri, rdp, first);
1183	if (error)
1184		goto fail;
1185
1186	/* Success */
1187	if (perror == 0 &&
1188	    ((uri->respclen == URI_LEN_NOVALUE &&
1189	    uri->resplen == URI_LEN_NOVALUE) ||
1190	    uri->count >= uri->resplen)) {
1191		/*
1192		 * No more data needed and no pending response
1193		 * data or current data count >= response length
1194		 * so close the URI processing for this so.
1195		 */
1196		nl7c_close(so);
1197		if (! (sti->sti_nl7c_flags & NL7C_SOPERSIST)) {
1198			/* Not a persistent connection */
1199			sti->sti_nl7c_flags = 0;
1200		}
1201	}
1202
1203	return (0);
1204
1205fail:
1206	if (alloc != NULL) {
1207		kmem_free(alloc, sz);
1208	}
1209	sti->sti_nl7c_flags = 0;
1210	nl7c_urifree(so);
1211
1212	return (error);
1213}
1214
1215/*
1216 * Called to read data from file "*fp" at offset "*off" of length "*len"
1217 * for a maximum of "*max_rem" bytes.
1218 *
1219 * On success a pointer to the kmem_alloc()ed file data is returned, "*off"
1220 * and "*len" are updated for the acutal number of bytes read and "*max_rem"
1221 * is updated with the number of bytes remaining to be read.
1222 *
1223 * Else, "NULL" is returned.
1224 */
1225
1226static char *
1227nl7c_readfile(file_t *fp, u_offset_t *off, int *len, int max, int *ret)
1228{
1229	vnode_t	*vp = fp->f_vnode;
1230	int	flg = 0;
1231	size_t	size = MIN(*len, max);
1232	char	*data;
1233	int	error;
1234	uio_t	uio;
1235	iovec_t	iov;
1236
1237	(void) VOP_RWLOCK(vp, flg, NULL);
1238
1239	if (*off > MAXOFFSET_T) {
1240		VOP_RWUNLOCK(vp, flg, NULL);
1241		*ret = EFBIG;
1242		return (NULL);
1243	}
1244
1245	if (*off + size > MAXOFFSET_T)
1246		size = (ssize32_t)(MAXOFFSET_T - *off);
1247
1248	data = kmem_alloc(size, KM_SLEEP);
1249
1250	iov.iov_base = data;
1251	iov.iov_len = size;
1252	uio.uio_loffset = *off;
1253	uio.uio_iov = &iov;
1254	uio.uio_iovcnt = 1;
1255	uio.uio_resid = size;
1256	uio.uio_segflg = UIO_SYSSPACE;
1257	uio.uio_llimit = MAXOFFSET_T;
1258	uio.uio_fmode = fp->f_flag;
1259
1260	error = VOP_READ(vp, &uio, fp->f_flag, fp->f_cred, NULL);
1261	VOP_RWUNLOCK(vp, flg, NULL);
1262	*ret = error;
1263	if (error) {
1264		kmem_free(data, size);
1265		return (NULL);
1266	}
1267	*len = size;
1268	*off += size;
1269	return (data);
1270}
1271
1272/*
1273 * Called to sink application response sendfilev, as with nl7c_data() above
1274 * all the data will be processed by NL7C unless there's an error.
1275 */
1276
1277int
1278nl7c_sendfilev(struct sonode *so, u_offset_t *fileoff, sendfilevec_t *sfvp,
1279    int sfvc, ssize_t *xfer)
1280{
1281	sotpi_info_t	*sti = SOTOTPI(so);
1282	uri_desc_t	*uri = (uri_desc_t *)sti->sti_nl7c_uri;
1283	file_t		*fp = NULL;
1284	vnode_t		*vp = NULL;
1285	char		*data = NULL;
1286	u_offset_t	off;
1287	int		len;
1288	int		cnt;
1289	int		total_count = 0;
1290	char		*alloc;
1291	uri_rd_t	*rdp;
1292	int		max;
1293	int		perror;
1294	int		error = 0;
1295	boolean_t	first = B_TRUE;
1296
1297	nl7c_uri_sendfilev++;
1298
1299	if (uri == NULL) {
1300		/* Socket & NL7C out of sync, disable NL7C */
1301		sti->sti_nl7c_flags = 0;
1302		nl7c_uri_NULL2++;
1303		return (0);
1304	}
1305
1306	if (sti->sti_nl7c_flags & NL7C_WAITWRITE)
1307		sti->sti_nl7c_flags &= ~NL7C_WAITWRITE;
1308
1309	while (sfvc-- > 0) {
1310		/*
1311		 * off - the current sfv read file offset or user address.
1312		 *
1313		 * len - the current sfv length in bytes.
1314		 *
1315		 * cnt - number of bytes kmem_alloc()ed.
1316		 *
1317		 * alloc - the kmem_alloc()ed buffer of size "cnt".
1318		 *
1319		 * data - copy of "alloc" used for post alloc references.
1320		 *
1321		 * fp - the current sfv file_t pointer.
1322		 *
1323		 * vp - the current "*vp" vnode_t pointer.
1324		 *
1325		 * Note, for "data" and "fp" and "vp" a NULL value is used
1326		 * when not allocated such that the common failure path "fail"
1327		 * is used.
1328		 */
1329		off = sfvp->sfv_off;
1330		len = sfvp->sfv_len;
1331		cnt = len;
1332
1333		if (len == 0) {
1334			sfvp++;
1335			continue;
1336		}
1337
1338		if (sfvp->sfv_fd == SFV_FD_SELF) {
1339			/*
1340			 * User memory, copyin() all the bytes.
1341			 */
1342			alloc = kmem_alloc(cnt, KM_SLEEP);
1343			error = xcopyin((caddr_t)(uintptr_t)off, alloc, cnt);
1344			if (error)
1345				goto fail;
1346		} else {
1347			/*
1348			 * File descriptor, prefetch some bytes.
1349			 */
1350			if ((fp = getf(sfvp->sfv_fd)) == NULL) {
1351				error = EBADF;
1352				goto fail;
1353			}
1354			if ((fp->f_flag & FREAD) == 0) {
1355				error = EACCES;
1356				goto fail;
1357			}
1358			vp = fp->f_vnode;
1359			if (vp->v_type != VREG) {
1360				error = EINVAL;
1361				goto fail;
1362			}
1363			VN_HOLD(vp);
1364
1365			/* Read max_rem bytes from file for prefetch */
1366			if (nl7c_use_kmem) {
1367				max = cnt;
1368			} else {
1369				max = MAXBSIZE * nl7c_file_prefetch;
1370			}
1371			alloc = nl7c_readfile(fp, &off, &cnt, max, &error);
1372			if (alloc == NULL)
1373				goto fail;
1374
1375			releasef(sfvp->sfv_fd);
1376			fp = NULL;
1377		}
1378		URI_RD_ADD(uri, rdp, cnt, -1);
1379		if (rdp == NULL) {
1380			error = ENOMEM;
1381			goto fail;
1382		}
1383		data = alloc;
1384		alloc = NULL;
1385		rdp->data.kmem = data;
1386		total_count += cnt;
1387		if (uri->hash != URI_TEMP && total_count > nca_max_cache_size) {
1388			uri_delete(uri);
1389			uri->hash = URI_TEMP;
1390		}
1391
1392		/* Response parse */
1393		perror = nl7c_resp_parse(so, uri, data, len);
1394
1395		/* Send kmem data out the connection */
1396		error = uri_rd_response(so, uri, rdp, first);
1397
1398		if (error)
1399			goto fail;
1400
1401		if (sfvp->sfv_fd != SFV_FD_SELF) {
1402			/*
1403			 * File descriptor, if any bytes left save vnode_t.
1404			 */
1405			if (len > cnt) {
1406				/* More file data so add it */
1407				URI_RD_ADD(uri, rdp, len - cnt, off);
1408				if (rdp == NULL) {
1409					error = ENOMEM;
1410					goto fail;
1411				}
1412				rdp->data.vnode = vp;
1413
1414				/* Send vnode data out the connection */
1415				error = uri_rd_response(so, uri, rdp, first);
1416			} else {
1417				/* All file data fit in the prefetch */
1418				VN_RELE(vp);
1419			}
1420			*fileoff += len;
1421			vp = NULL;
1422		}
1423		*xfer += len;
1424		sfvp++;
1425
1426		if (first)
1427			first = B_FALSE;
1428	}
1429	if (total_count > 0) {
1430		atomic_add_64(&nl7c_uri_bytes, total_count);
1431	}
1432	if (perror == 0 &&
1433	    ((uri->respclen == URI_LEN_NOVALUE &&
1434	    uri->resplen == URI_LEN_NOVALUE) ||
1435	    uri->count >= uri->resplen)) {
1436		/*
1437		 * No more data needed and no pending response
1438		 * data or current data count >= response length
1439		 * so close the URI processing for this so.
1440		 */
1441		nl7c_close(so);
1442		if (! (sti->sti_nl7c_flags & NL7C_SOPERSIST)) {
1443			/* Not a persistent connection */
1444			sti->sti_nl7c_flags = 0;
1445		}
1446	}
1447
1448	return (0);
1449
1450fail:
1451	if (error == EPIPE)
1452		tsignal(curthread, SIGPIPE);
1453
1454	if (alloc != NULL)
1455		kmem_free(data, len);
1456
1457	if (vp != NULL)
1458		VN_RELE(vp);
1459
1460	if (fp != NULL)
1461		releasef(sfvp->sfv_fd);
1462
1463	if (total_count > 0) {
1464		atomic_add_64(&nl7c_uri_bytes, total_count);
1465	}
1466
1467	sti->sti_nl7c_flags = 0;
1468	nl7c_urifree(so);
1469
1470	return (error);
1471}
1472
1473/*
1474 * Called for a socket which is closing or when an application has
1475 * completed sending all the response data (i.e. for a persistent
1476 * connection called once for each completed application response).
1477 */
1478
1479void
1480nl7c_close(struct sonode *so)
1481{
1482	sotpi_info_t	*sti = SOTOTPI(so);
1483	uri_desc_t	*uri = (uri_desc_t *)sti->sti_nl7c_uri;
1484
1485	if (uri == NULL) {
1486		/*
1487		 * No URI being processed so might be a listen()er
1488		 * if so do any cleanup, else nothing more to do.
1489		 */
1490		if (so->so_state & SS_ACCEPTCONN) {
1491			(void) nl7c_close_addr(so);
1492		}
1493		return;
1494	}
1495	sti->sti_nl7c_uri = NULL;
1496	if (uri->hash != URI_TEMP) {
1497		mutex_enter(&uri->proclock);
1498		uri->proc = NULL;
1499		if (CV_HAS_WAITERS(&uri->waiting)) {
1500			cv_broadcast(&uri->waiting);
1501		}
1502		mutex_exit(&uri->proclock);
1503		nl7c_uri_close++;
1504	} else {
1505		/* No proclock as uri exclusively owned by so */
1506		uri->proc = NULL;
1507		nl7c_uri_temp_close++;
1508	}
1509	REF_RELE(uri);
1510	if (nl7c_uri_max > 0 && nl7c_uri_bytes > nl7c_uri_max) {
1511		nl7c_uri_reclaim();
1512	}
1513}
1514
1515/*
1516 * The uri_segmap_t ref_t inactive function called on the last REF_RELE(),
1517 * release the segmap mapping. Note, the uri_segmap_t will be freed by
1518 * REF_RELE() on return.
1519 */
1520
1521void
1522uri_segmap_inactive(uri_segmap_t *smp)
1523{
1524	if (!segmap_kpm) {
1525		(void) segmap_fault(kas.a_hat, segkmap, smp->base,
1526		    smp->len, F_SOFTUNLOCK, S_OTHER);
1527	}
1528	(void) segmap_release(segkmap, smp->base, SM_DONTNEED);
1529	VN_RELE(smp->vp);
1530}
1531
1532/*
1533 * The call-back for desballoc()ed mblk_t's, if a segmap mapped mblk_t
1534 * release the reference, one per desballoc() of a segmap page, if a rd_t
1535 * mapped mblk_t release the reference, one per desballoc() of a uri_desc_t,
1536 * last kmem free the uri_desb_t.
1537 */
1538
1539static void
1540uri_desb_free(uri_desb_t *desb)
1541{
1542	if (desb->segmap != NULL) {
1543		REF_RELE(desb->segmap);
1544	}
1545	REF_RELE(desb->uri);
1546	kmem_cache_free(uri_desb_kmc, desb);
1547}
1548
1549/*
1550 * Segmap map up to a page of a uri_rd_t file descriptor.
1551 */
1552
1553uri_segmap_t *
1554uri_segmap_map(uri_rd_t *rdp, int bytes)
1555{
1556	uri_segmap_t	*segmap = kmem_cache_alloc(uri_segmap_kmc, KM_SLEEP);
1557	int		len = MIN(rdp->sz, MAXBSIZE);
1558
1559	if (len > bytes)
1560		len = bytes;
1561
1562	REF_INIT(segmap, 1, uri_segmap_inactive, uri_segmap_kmc);
1563	segmap->len = len;
1564	VN_HOLD(rdp->data.vnode);
1565	segmap->vp = rdp->data.vnode;
1566
1567	segmap->base = segmap_getmapflt(segkmap, segmap->vp, rdp->off, len,
1568	    segmap_kpm ? SM_FAULT : 0, S_READ);
1569
1570	if (segmap_fault(kas.a_hat, segkmap, segmap->base, len,
1571	    F_SOFTLOCK, S_READ) != 0) {
1572		REF_RELE(segmap);
1573		return (NULL);
1574	}
1575	return (segmap);
1576}
1577
1578/*
1579 * Chop up the kernel virtual memory area *data of size *sz bytes for
1580 * a maximum of *bytes bytes into an besballoc()ed mblk_t chain using
1581 * the given template uri_desb_t *temp of max_mblk bytes per.
1582 *
1583 * The values of *data, *sz, and *bytes are updated on return, the
1584 * mblk_t chain is returned.
1585 */
1586
1587static mblk_t *
1588uri_desb_chop(char **data, size_t *sz, int *bytes, uri_desb_t *temp,
1589    int max_mblk, char *eoh, mblk_t *persist)
1590{
1591	char		*ldata = *data;
1592	size_t		lsz = *sz;
1593	int		lbytes = bytes ? *bytes : lsz;
1594	uri_desb_t	*desb;
1595	mblk_t		*mp = NULL;
1596	mblk_t		*nmp, *pmp = NULL;
1597	int		msz;
1598
1599	if (lbytes == 0 && lsz == 0)
1600		return (NULL);
1601
1602	while (lbytes > 0 && lsz > 0) {
1603		msz = MIN(lbytes, max_mblk);
1604		msz = MIN(msz, lsz);
1605		if (persist && eoh >= ldata && eoh < &ldata[msz]) {
1606			msz = (eoh - ldata);
1607			pmp = persist;
1608			persist = NULL;
1609			if (msz == 0) {
1610				nmp = pmp;
1611				pmp = NULL;
1612				goto zero;
1613			}
1614		}
1615		desb = kmem_cache_alloc(uri_desb_kmc, KM_SLEEP);
1616		REF_HOLD(temp->uri);
1617		if (temp->segmap) {
1618			REF_HOLD(temp->segmap);
1619		}
1620		bcopy(temp, desb, sizeof (*desb));
1621		desb->frtn.free_arg = (caddr_t)desb;
1622		nmp = desballoc((uchar_t *)ldata, msz, BPRI_HI, &desb->frtn);
1623		if (nmp == NULL) {
1624			if (temp->segmap) {
1625				REF_RELE(temp->segmap);
1626			}
1627			REF_RELE(temp->uri);
1628			if (mp != NULL) {
1629				mp->b_next = NULL;
1630				freemsg(mp);
1631			}
1632			if (persist != NULL) {
1633				freeb(persist);
1634			}
1635			return (NULL);
1636		}
1637		nmp->b_wptr += msz;
1638	zero:
1639		if (mp != NULL) {
1640			mp->b_next->b_cont = nmp;
1641		} else {
1642			mp = nmp;
1643		}
1644		if (pmp != NULL) {
1645			nmp->b_cont = pmp;
1646			nmp = pmp;
1647			pmp = NULL;
1648		}
1649		mp->b_next = nmp;
1650		ldata += msz;
1651		lsz -= msz;
1652		lbytes -= msz;
1653	}
1654	*data = ldata;
1655	*sz = lsz;
1656	if (bytes)
1657		*bytes = lbytes;
1658	return (mp);
1659}
1660
1661/*
1662 * Experimential noqwait (i.e. no canput()/qwait() checks), just send
1663 * the entire mblk_t chain down without flow-control checks.
1664 */
1665
1666static int
1667kstrwritempnoqwait(struct vnode *vp, mblk_t *mp)
1668{
1669	struct stdata *stp;
1670	int error = 0;
1671
1672	ASSERT(vp->v_stream);
1673	stp = vp->v_stream;
1674
1675	/* Fast check of flags before acquiring the lock */
1676	if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
1677		mutex_enter(&stp->sd_lock);
1678		error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0);
1679		mutex_exit(&stp->sd_lock);
1680		if (error != 0) {
1681			if (!(stp->sd_flag & STPLEX) &&
1682			    (stp->sd_wput_opt & SW_SIGPIPE)) {
1683				error = EPIPE;
1684			}
1685			return (error);
1686		}
1687	}
1688	putnext(stp->sd_wrq, mp);
1689	return (0);
1690}
1691
1692/*
1693 * Send the URI uri_desc_t *uri response uri_rd_t *rdp out the socket_t *so.
1694 */
1695
1696static int
1697uri_rd_response(struct sonode *so,
1698    uri_desc_t *uri,
1699    uri_rd_t *rdp,
1700    boolean_t first)
1701{
1702	vnode_t		*vp = SOTOV(so);
1703	int		max_mblk = (int)vp->v_stream->sd_maxblk;
1704	int		wsz;
1705	mblk_t		*mp, *wmp, *persist;
1706	int		write_bytes;
1707	uri_rd_t	rd;
1708	uri_desb_t	desb;
1709	uri_segmap_t	*segmap = NULL;
1710	char		*segmap_data;
1711	size_t		segmap_sz;
1712	int		error;
1713	int		fflg = ((so->so_state & SS_NDELAY) ? FNDELAY : 0) |
1714	    ((so->so_state & SS_NONBLOCK) ? FNONBLOCK : 0);
1715
1716
1717	/* Initialize template uri_desb_t */
1718	desb.frtn.free_func = uri_desb_free;
1719	desb.frtn.free_arg = NULL;
1720	desb.uri = uri;
1721
1722	/* Get a local copy of the rd_t */
1723	bcopy(rdp, &rd, sizeof (rd));
1724	do {
1725		if (first) {
1726			/*
1727			 * For first kstrwrite() enough data to get
1728			 * things going, note non blocking version of
1729			 * kstrwrite() will be used below.
1730			 */
1731			write_bytes = P2ROUNDUP((max_mblk * 4),
1732			    MAXBSIZE * nl7c_file_prefetch);
1733		} else {
1734			if ((write_bytes = so->so_sndbuf) == 0)
1735				write_bytes = vp->v_stream->sd_qn_maxpsz;
1736			ASSERT(write_bytes > 0);
1737			write_bytes = P2ROUNDUP(write_bytes, MAXBSIZE);
1738		}
1739		/*
1740		 * Chop up to a write_bytes worth of data.
1741		 */
1742		wmp = NULL;
1743		wsz = write_bytes;
1744		do {
1745			if (rd.sz == 0)
1746				break;
1747			if (rd.off == -1) {
1748				if (uri->eoh >= rd.data.kmem &&
1749				    uri->eoh < &rd.data.kmem[rd.sz]) {
1750					persist = nl7c_http_persist(so);
1751				} else {
1752					persist = NULL;
1753				}
1754				desb.segmap = NULL;
1755				mp = uri_desb_chop(&rd.data.kmem, &rd.sz,
1756				    &wsz, &desb, max_mblk, uri->eoh, persist);
1757				if (mp == NULL) {
1758					error = ENOMEM;
1759					goto invalidate;
1760				}
1761			} else {
1762				if (segmap == NULL) {
1763					segmap = uri_segmap_map(&rd,
1764					    write_bytes);
1765					if (segmap == NULL) {
1766						error = ENOMEM;
1767						goto invalidate;
1768					}
1769					desb.segmap = segmap;
1770					segmap_data = segmap->base;
1771					segmap_sz = segmap->len;
1772				}
1773				mp = uri_desb_chop(&segmap_data, &segmap_sz,
1774				    &wsz, &desb, max_mblk, NULL, NULL);
1775				if (mp == NULL) {
1776					error = ENOMEM;
1777					goto invalidate;
1778				}
1779				if (segmap_sz == 0) {
1780					rd.sz -= segmap->len;
1781					rd.off += segmap->len;
1782					REF_RELE(segmap);
1783					segmap = NULL;
1784				}
1785			}
1786			if (wmp == NULL) {
1787				wmp = mp;
1788			} else {
1789				wmp->b_next->b_cont = mp;
1790				wmp->b_next = mp->b_next;
1791				mp->b_next = NULL;
1792			}
1793		} while (wsz > 0 && rd.sz > 0);
1794
1795		wmp->b_next = NULL;
1796		if (first) {
1797			/* First kstrwrite(), use noqwait */
1798			if ((error = kstrwritempnoqwait(vp, wmp)) != 0)
1799				goto invalidate;
1800			/*
1801			 * For the rest of the kstrwrite()s use SO_SNDBUF
1802			 * worth of data at a time, note these kstrwrite()s
1803			 * may (will) block one or more times.
1804			 */
1805			first = B_FALSE;
1806		} else {
1807			if ((error = kstrwritemp(vp, wmp, fflg)) != 0) {
1808				if (error == EAGAIN) {
1809					nl7c_uri_rd_EAGAIN++;
1810					if ((error =
1811					    kstrwritempnoqwait(vp, wmp)) != 0)
1812						goto invalidate;
1813				} else
1814					goto invalidate;
1815			}
1816		}
1817	} while (rd.sz > 0);
1818
1819	return (0);
1820
1821invalidate:
1822	if (segmap) {
1823		REF_RELE(segmap);
1824	}
1825	if (wmp)
1826		freemsg(wmp);
1827
1828	return (error);
1829}
1830
1831/*
1832 * Send the URI uri_desc_t *uri response out the socket_t *so.
1833 */
1834
1835static int
1836uri_response(struct sonode *so, uri_desc_t *uri)
1837{
1838	uri_rd_t	*rdp = &uri->response;
1839	boolean_t	first = B_TRUE;
1840	int		error;
1841
1842	while (rdp != NULL) {
1843		error = uri_rd_response(so, uri, rdp, first);
1844		if (error != 0) {
1845			goto invalidate;
1846		}
1847		first = B_FALSE;
1848		rdp = rdp->next;
1849	}
1850	return (0);
1851
1852invalidate:
1853	if (uri->hash != URI_TEMP)
1854		uri_delete(uri);
1855	return (error);
1856}
1857
1858/*
1859 * The pchars[] array is indexed by a char to determine if it's a
1860 * valid URI path component chararcter where:
1861 *
1862 *    pchar       = unreserved | escaped |
1863 *                  ":" | "@" | "&" | "=" | "+" | "$" | ","
1864 *
1865 *    unreserved  = alphanum | mark
1866 *
1867 *    alphanum    = alpha | digit
1868 *
1869 *    alpha       = lowalpha | upalpha
1870 *
1871 *    lowalpha    = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" |
1872 *                  "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" |
1873 *                  "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" |
1874 *                  "y" | "z"
1875 *
1876 *    upalpha     = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" |
1877 *                  "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" |
1878 *                  "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" |
1879 *                  "Y" | "Z"
1880 *
1881 *    digit       = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
1882 *                  "8" | "9"
1883 *
1884 *    mark        = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
1885 *
1886 *    escaped     = "%" hex hex
1887 *    hex         = digit | "A" | "B" | "C" | "D" | "E" | "F" |
1888 *                  "a" | "b" | "c" | "d" | "e" | "f"
1889 */
1890
1891static char pchars[] = {
1892    0, 0, 0, 0, 0, 0, 0, 0,	/* 0x00 - 0x07 */
1893    0, 0, 0, 0, 0, 0, 0, 0,	/* 0x08 - 0x0F */
1894    0, 0, 0, 0, 0, 0, 0, 0,	/* 0x10 - 0x17 */
1895    0, 0, 0, 0, 0, 0, 0, 0,	/* 0x18 - 0x1F */
1896    0, 1, 0, 0, 1, 1, 1, 1,	/* 0x20 - 0x27 */
1897    0, 0, 1, 1, 1, 1, 1, 1,	/* 0x28 - 0x2F */
1898    1, 1, 1, 1, 1, 1, 1, 1,	/* 0x30 - 0x37 */
1899    1, 1, 1, 0, 0, 1, 0, 0,	/* 0x38 - 0x3F */
1900    1, 1, 1, 1, 1, 1, 1, 1,	/* 0x40 - 0x47 */
1901    1, 1, 1, 1, 1, 1, 1, 1,	/* 0x48 - 0x4F */
1902    1, 1, 1, 1, 1, 1, 1, 1,	/* 0x50 - 0x57 */
1903    1, 1, 1, 0, 0, 0, 0, 1,	/* 0x58 - 0x5F */
1904    0, 1, 1, 1, 1, 1, 1, 1,	/* 0x60 - 0x67 */
1905    1, 1, 1, 1, 1, 1, 1, 1,	/* 0x68 - 0x6F */
1906    1, 1, 1, 1, 1, 1, 1, 1,	/* 0x70 - 0x77 */
1907    1, 1, 1, 0, 0, 0, 1, 0	/* 0x78 - 0x7F */
1908};
1909
1910#define	PCHARS_MASK 0x7F
1911
1912/*
1913 * This is the main L7 request message parse, we are called each time
1914 * new data is availble for a socket, each time a single buffer of the
1915 * entire message to date is given.
1916 *
1917 * Here we parse the request looking for the URI, parse it, and if a
1918 * supported scheme call the scheme parser to commplete the parse of any
1919 * headers which may further qualify the identity of the requested object
1920 * then lookup it up in the URI hash.
1921 *
1922 * Return B_TRUE for more processing.
1923 *
1924 * Note, at this time the parser supports the generic message format as
1925 * specified in RFC 822 with potentional limitations as specified in RFC
1926 * 2616 for HTTP messages.
1927 *
1928 * Note, the caller supports an mblk_t chain, for now the parser(s)
1929 * require the complete header in a single mblk_t. This is the common
1930 * case and certainly for high performance environments, if at a future
1931 * date mblk_t chains are important the parse can be reved to process
1932 * mblk_t chains.
1933 */
1934
1935boolean_t
1936nl7c_parse(struct sonode *so, boolean_t nonblocking, boolean_t *ret)
1937{
1938	sotpi_info_t *sti = SOTOTPI(so);
1939	char	*cp = (char *)sti->sti_nl7c_rcv_mp->b_rptr;
1940	char	*ep = (char *)sti->sti_nl7c_rcv_mp->b_wptr;
1941	char	*get = "GET ";
1942	char	*post = "POST ";
1943	char	c;
1944	char	*uris;
1945	uri_desc_t *uri = NULL;
1946	uri_desc_t *ruri = NULL;
1947	mblk_t	*reqmp;
1948	uint32_t hv = 0;
1949
1950	if ((reqmp = dupb(sti->sti_nl7c_rcv_mp)) == NULL) {
1951		nl7c_uri_pass_dupbfail++;
1952		goto pass;
1953	}
1954	/*
1955	 * Allocate and initialize minimumal state for the request
1956	 * uri_desc_t, in the cache hit case this uri_desc_t will
1957	 * be freed.
1958	 */
1959	uri = kmem_cache_alloc(nl7c_uri_kmc, KM_SLEEP);
1960	REF_INIT(uri, 1, nl7c_uri_inactive, nl7c_uri_kmc);
1961	uri->hash = NULL;
1962	uri->tail = NULL;
1963	uri->scheme = NULL;
1964	uri->count = 0;
1965	uri->reqmp = reqmp;
1966
1967	/*
1968	 * Set request time to current time.
1969	 */
1970	sti->sti_nl7c_rtime = gethrestime_sec();
1971
1972	/*
1973	 * Parse the Request-Line for the URI.
1974	 *
1975	 * For backwards HTTP version compatable reasons skip any leading
1976	 * CRLF (or CR or LF) line terminator(s) preceding Request-Line.
1977	 */
1978	while (cp < ep && (*cp == '\r' || *cp == '\n')) {
1979		cp++;
1980	}
1981	while (cp < ep && *get == *cp) {
1982		get++;
1983		cp++;
1984	}
1985	if (*get != 0) {
1986		/* Note a "GET", check for "POST" */
1987		while (cp < ep && *post == *cp) {
1988			post++;
1989			cp++;
1990		}
1991		if (*post != 0) {
1992			if (cp == ep) {
1993				nl7c_uri_more_get++;
1994				goto more;
1995			}
1996			/* Not a "GET" or a "POST", just pass */
1997			nl7c_uri_pass_method++;
1998			goto pass;
1999		}
2000		/* "POST", don't cache but still may want to parse */
2001		uri->hash = URI_TEMP;
2002	}
2003	/*
2004	 * Skip over URI path char(s) and save start and past end pointers.
2005	 */
2006	uris = cp;
2007	while (cp < ep && (c = *cp) != ' ' && c != '\r') {
2008		if (c == '?') {
2009			/* Don't cache but still may want to parse */
2010			uri->hash = URI_TEMP;
2011		}
2012		CHASH(hv, c);
2013		cp++;
2014	}
2015	if (c != '\r' && cp == ep) {
2016		nl7c_uri_more_eol++;
2017		goto more;
2018	}
2019	/*
2020	 * Request-Line URI parsed, pass the rest of the request on
2021	 * to the the http scheme parse.
2022	 */
2023	uri->path.cp = uris;
2024	uri->path.ep = cp;
2025	uri->hvalue = hv;
2026	if (! nl7c_http_request(&cp, ep, uri, so) || cp == NULL) {
2027		/*
2028		 * Parse not successful or pass on request, the pointer
2029		 * to the parse pointer "cp" is overloaded such that ! NULL
2030		 * for more data and NULL for bad parse of request or pass.
2031		 */
2032		if (cp != NULL) {
2033			nl7c_uri_more_http++;
2034			goto more;
2035		}
2036		nl7c_uri_pass_http++;
2037		goto pass;
2038	}
2039	if (uri->nocache) {
2040		uri->hash = URI_TEMP;
2041		(void) uri_lookup(uri, B_FALSE, nonblocking);
2042	} else if (uri->hash == URI_TEMP) {
2043		uri->nocache = B_TRUE;
2044		(void) uri_lookup(uri, B_FALSE, nonblocking);
2045	}
2046
2047	if (uri->hash == URI_TEMP) {
2048		if (sti->sti_nl7c_flags & NL7C_SOPERSIST) {
2049			/* Temporary URI so skip hash processing */
2050			nl7c_uri_request++;
2051			nl7c_uri_temp++;
2052			goto temp;
2053		}
2054		/* Not persistent so not interested in the response */
2055		nl7c_uri_pass_temp++;
2056		goto pass;
2057	}
2058	/*
2059	 * Check the URI hash for a cached response, save the request
2060	 * uri in case we need it below.
2061	 */
2062	ruri = uri;
2063	if ((uri = uri_lookup(uri, B_TRUE, nonblocking)) == NULL) {
2064		/*
2065		 * Failed to lookup due to nonblocking wait required,
2066		 * interrupted cv_wait_sig(), KM_NOSLEEP memory alloc
2067		 * failure, ... Just pass on this request.
2068		 */
2069		nl7c_uri_pass_addfail++;
2070		goto pass;
2071	}
2072	nl7c_uri_request++;
2073	if (uri->response.sz > 0) {
2074		/*
2075		 * We have the response cached, update recv mblk rptr
2076		 * to reflect the data consumed in parse.
2077		 */
2078		mblk_t	*mp = sti->sti_nl7c_rcv_mp;
2079
2080		if (cp == (char *)mp->b_wptr) {
2081			sti->sti_nl7c_rcv_mp = mp->b_cont;
2082			mp->b_cont = NULL;
2083			freeb(mp);
2084		} else {
2085			mp->b_rptr = (unsigned char *)cp;
2086		}
2087		nl7c_uri_hit++;
2088		/* If logging enabled log request */
2089		if (nl7c_logd_enabled) {
2090			ipaddr_t faddr;
2091
2092			if (so->so_family == AF_INET) {
2093				/* Only support IPv4 addrs */
2094				faddr = ((struct sockaddr_in *)
2095				    sti->sti_faddr_sa) ->sin_addr.s_addr;
2096			} else {
2097				faddr = 0;
2098			}
2099			/* XXX need to pass response type, e.g. 200, 304 */
2100			nl7c_logd_log(ruri, uri, sti->sti_nl7c_rtime, faddr);
2101		}
2102
2103		/* If conditional request check for substitute response */
2104		if (ruri->conditional) {
2105			uri = nl7c_http_cond(ruri, uri);
2106		}
2107
2108		/*
2109		 * Release reference on request URI, send the response out
2110		 * the socket, release reference on response uri, set the
2111		 * *ret value to B_TRUE to indicate request was consumed
2112		 * then return B_FALSE to indcate no more data needed.
2113		 */
2114		REF_RELE(ruri);
2115		(void) uri_response(so, uri);
2116		REF_RELE(uri);
2117		*ret = B_TRUE;
2118		return (B_FALSE);
2119	}
2120	/*
2121	 * Miss the cache, the request URI is in the cache waiting for
2122	 * application write-side data to fill it.
2123	 */
2124	nl7c_uri_miss++;
2125temp:
2126	/*
2127	 * A miss or temp URI for which response data is needed, link
2128	 * uri to so and so to uri, set WAITWRITE in the so such that
2129	 * read-side processing is suspended (so the next read() gets
2130	 * the request data) until a write() is processed by NL7C.
2131	 *
2132	 * Note, sti->sti_nl7c_uri now owns the REF_INIT() ref.
2133	 */
2134	uri->proc = so;
2135	sti->sti_nl7c_uri = uri;
2136	sti->sti_nl7c_flags |= NL7C_WAITWRITE;
2137	*ret = B_FALSE;
2138	return (B_FALSE);
2139
2140more:
2141	/* More data is needed, note fragmented recv not supported */
2142	nl7c_uri_more++;
2143
2144pass:
2145	/* Pass on this request */
2146	nl7c_uri_pass++;
2147	nl7c_uri_request++;
2148	if (ruri != NULL) {
2149		REF_RELE(ruri);
2150	}
2151	if (uri) {
2152		REF_RELE(uri);
2153	}
2154	sti->sti_nl7c_flags = 0;
2155	*ret = B_FALSE;
2156	return (B_FALSE);
2157}
2158