18d499c80SGordon Ross /*
28d499c80SGordon Ross  * This file and its contents are supplied under the terms of the
38d499c80SGordon Ross  * Common Development and Distribution License ("CDDL"), version 1.0.
48d499c80SGordon Ross  * You may only use this file in accordance with the terms of version
58d499c80SGordon Ross  * 1.0 of the CDDL.
68d499c80SGordon Ross  *
78d499c80SGordon Ross  * A full copy of the text of the CDDL should have accompanied this
88d499c80SGordon Ross  * source.  A copy of the CDDL is also available via the Internet at
98d499c80SGordon Ross  * http://www.illumos.org/license/CDDL.
108d499c80SGordon Ross  */
118d499c80SGordon Ross 
128d499c80SGordon Ross /*
13ba55de6bSGordon Ross  * Copyright 2018-2021 Tintri by DDN, Inc.  All rights reserved.
1408f2ce59SGordon Ross  * Copyright 2022 RackTop Systems, Inc.
158d499c80SGordon Ross  */
168d499c80SGordon Ross 
178d499c80SGordon Ross /*
188d499c80SGordon Ross  * Support functions for smb2_ioctl/fsctl codes:
19*f88ed57cSGordon Ross  * FSCTL_OFFLOAD_READ
20*f88ed57cSGordon Ross  * FSCTL_OFFLOAD_WRITE
218d499c80SGordon Ross  * (and related)
228d499c80SGordon Ross  */
238d499c80SGordon Ross 
248d499c80SGordon Ross #include <smbsrv/smb2_kproto.h>
258d499c80SGordon Ross #include <smbsrv/smb_fsops.h>
268d499c80SGordon Ross #include <smb/winioctl.h>
278d499c80SGordon Ross 
288d499c80SGordon Ross /*
298d499c80SGordon Ross  * Summary of how offload data transfer works:
308d499c80SGordon Ross  *
318d499c80SGordon Ross  * The client drives a server-side copy.  Outline:
328d499c80SGordon Ross  * 1: open src_file
338d499c80SGordon Ross  * 2: create dst_file and set its size
348d499c80SGordon Ross  * 3: while src_file not all copied {
358d499c80SGordon Ross  *        offload_read(src_file, &token);
368d499c80SGordon Ross  *        while token not all copied {
378d499c80SGordon Ross  *	      offload_write(dst_file, token);
388d499c80SGordon Ross  *        }
398d499c80SGordon Ross  *    }
408d499c80SGordon Ross  *
418d499c80SGordon Ross  * Each "offload read" request returns a "token" representing some
428d499c80SGordon Ross  * portion of the source file.  The server decides what kind of
438d499c80SGordon Ross  * token to use, and how much of the source file it should cover.
448d499c80SGordon Ross  * The length represented may be less then the client requested.
458d499c80SGordon Ross  * No data are copied during offload_read (just meta-data).
468d499c80SGordon Ross  *
478d499c80SGordon Ross  * Each "offload write" request copies some portion of the data
488d499c80SGordon Ross  * represented by the "token" into the output file.  The amount
498d499c80SGordon Ross  * of data copied may be less than the client requested, and the
508d499c80SGordon Ross  * client keeps sending offload write requests until they have
518d499c80SGordon Ross  * copied all the data represented by the current token.
528d499c80SGordon Ross  */
538d499c80SGordon Ross 
548d499c80SGordon Ross /* [MS-FSA] OFFLOAD_READ_FLAG_ALL_ZERO_BEYOND_CURRENT_RANGE */
558d499c80SGordon Ross #define	OFFLOAD_READ_FLAG_ALL_ZERO_BEYOND	1
568d499c80SGordon Ross 
578d499c80SGordon Ross /*
588d499c80SGordon Ross  * [MS-FSCC] 2.3.79 STORAGE_OFFLOAD_TOKEN
598d499c80SGordon Ross  * Note reserved: 0xFFFF0002 – 0xFFFFFFFF
608d499c80SGordon Ross  *
618d499c80SGordon Ross  * ...TOKEN_TYPE_ZERO_DATA:  A well-known Token that indicates ...
628d499c80SGordon Ross  * (offload write should just zero to the destination)
638d499c80SGordon Ross  * The payload (tok_other) is ignored with this type.
648d499c80SGordon Ross  */
658d499c80SGordon Ross #define	STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA	0xFFFF0001
668d499c80SGordon Ross 
678d499c80SGordon Ross /* Our vendor-specific token type: struct tok_native1 */
688d499c80SGordon Ross #define	STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1	0x10001
698d499c80SGordon Ross 
708d499c80SGordon Ross #define	TOKEN_TOTAL_SIZE	512
718d499c80SGordon Ross #define	TOKEN_MAX_PAYLOAD	504	/* 512 - 8 */
728d499c80SGordon Ross 
738d499c80SGordon Ross /* This mask is for sanity checking offsets etc. */
748d499c80SGordon Ross #define	OFFMASK		((uint64_t)DEV_BSIZE-1)
758d499c80SGordon Ross 
768d499c80SGordon Ross typedef struct smb_odx_token {
778d499c80SGordon Ross 	uint32_t	tok_type;	/* big-endian on the wire */
788d499c80SGordon Ross 	uint16_t	tok_reserved;	/* zero */
798d499c80SGordon Ross 	uint16_t	tok_len;	/* big-endian on the wire */
808d499c80SGordon Ross 	union {
818d499c80SGordon Ross 		uint8_t u_tok_other[TOKEN_MAX_PAYLOAD];
828d499c80SGordon Ross 		struct tok_native1 {
838d499c80SGordon Ross 			smb2fid_t	tn1_fid;
848d499c80SGordon Ross 			uint64_t	tn1_off;
858d499c80SGordon Ross 			uint64_t	tn1_eof;
86ba55de6bSGordon Ross 			uint32_t	tn1_tid;
878d499c80SGordon Ross 		} u_tok_native1;
888d499c80SGordon Ross 	} tok_u;
898d499c80SGordon Ross } smb_odx_token_t;
908d499c80SGordon Ross 
918d499c80SGordon Ross typedef struct odx_write_args {
928d499c80SGordon Ross 	uint32_t in_struct_size;
938d499c80SGordon Ross 	uint32_t in_flags;
948d499c80SGordon Ross 	uint64_t in_dstoff;
958d499c80SGordon Ross 	uint64_t in_xlen;
968d499c80SGordon Ross 	uint64_t in_xoff;
978d499c80SGordon Ross 	uint32_t out_struct_size;
988d499c80SGordon Ross 	uint32_t out_flags;
998d499c80SGordon Ross 	uint64_t out_xlen;
1008d499c80SGordon Ross 	uint64_t wa_eof;
1018d499c80SGordon Ross } odx_write_args_t;
1028d499c80SGordon Ross 
1038d499c80SGordon Ross static int smb_odx_get_token(mbuf_chain_t *, smb_odx_token_t *);
1048d499c80SGordon Ross static int smb_odx_get_token_native1(mbuf_chain_t *, struct tok_native1 *);
1058d499c80SGordon Ross static int smb_odx_put_token(mbuf_chain_t *, smb_odx_token_t *);
1068d499c80SGordon Ross static int smb_odx_put_token_native1(mbuf_chain_t *, struct tok_native1 *);
1078d499c80SGordon Ross 
1088d499c80SGordon Ross static uint32_t smb2_fsctl_odx_write_zeros(smb_request_t *, odx_write_args_t *);
1098d499c80SGordon Ross static uint32_t smb2_fsctl_odx_write_native1(smb_request_t *,
1108d499c80SGordon Ross     odx_write_args_t *, smb_odx_token_t *);
1118d499c80SGordon Ross 
1128d499c80SGordon Ross 
1138d499c80SGordon Ross /* We can disable this feature for testing etc. */
1148d499c80SGordon Ross int smb2_odx_enable = 1;
1158d499c80SGordon Ross 
1168d499c80SGordon Ross /*
1178d499c80SGordon Ross  * These two variables determine the intervals of offload_read and
1188d499c80SGordon Ross  * offload_write calls (respectively) during an offload copy.
1198d499c80SGordon Ross  *
1208d499c80SGordon Ross  * For the offload read token we could offer a token representing
1218d499c80SGordon Ross  * the whole file, but we'll have the client come back for a new
1228d499c80SGordon Ross  * "token" after each 256M so we have a chance to look for "holes".
1238d499c80SGordon Ross  * This lets us use the special "zero" token while we're in any
1248d499c80SGordon Ross  * un-allocated parts of the file, so offload_write can use the
1258d499c80SGordon Ross  * (more efficient) smb_fsop_freesp instead of copying.
1268d499c80SGordon Ross  *
1278d499c80SGordon Ross  * We limit the size of offload_write to 16M per request so we
1288d499c80SGordon Ross  * don't end up taking so long with I/O that the client might
1298d499c80SGordon Ross  * time out the request.  Keep: write_max <= read_max
1308d499c80SGordon Ross  */
1318d499c80SGordon Ross uint32_t smb2_odx_read_max = (1<<28); /* 256M */
1328d499c80SGordon Ross uint32_t smb2_odx_write_max = (1<<24); /* 16M */
1338d499c80SGordon Ross 
1348d499c80SGordon Ross /*
1358d499c80SGordon Ross  * This buffer size determines the I/O size for the copy during
1368d499c80SGordon Ross  * offoad write, where it will read/write using this buffer.
1378d499c80SGordon Ross  * Note: We kmem_alloc this, so don't make it HUGE.  It only
1388d499c80SGordon Ross  * needs to be large enough to allow the copy to proceed with
1398d499c80SGordon Ross  * reasonable efficiency.  1M is currently the largest possible
1408d499c80SGordon Ross  * block size with ZFS, so that's what we'll use here.
141ba55de6bSGordon Ross  *
142ba55de6bSGordon Ross  * Actually, limit this to kmem_max_cached, to avoid contention
143ba55de6bSGordon Ross  * allocating from kmem_oversize_arena.
1448d499c80SGordon Ross  */
145ba55de6bSGordon Ross uint32_t smb2_odx_buf_size = (1<<17); /* 128k */
1468d499c80SGordon Ross 
1478d499c80SGordon Ross 
1488d499c80SGordon Ross /*
1498d499c80SGordon Ross  * FSCTL_OFFLOAD_READ
1508d499c80SGordon Ross  * [MS-FSCC] 2.3.77
1518d499c80SGordon Ross  *
1528d499c80SGordon Ross  * Similar (in concept) to FSCTL_SRV_REQUEST_RESUME_KEY
1538d499c80SGordon Ross  *
1548d499c80SGordon Ross  * The returned data is an (opaque to the client) 512-byte "token"
1558d499c80SGordon Ross  * that represents the specified range (offset, length) of the
1568d499c80SGordon Ross  * source file.  The "token" we return here comes back to us in an
1578d499c80SGordon Ross  * FSCTL_OFFLOAD_READ.  We must stash whatever we'll need then in
1588d499c80SGordon Ross  * the token we return here.
1598d499c80SGordon Ross  *
1608d499c80SGordon Ross  * We want server-side copy to be able to copy "holes" efficiently,
1618d499c80SGordon Ross  * but would rather avoid the complexity of encoding a list of all
1628d499c80SGordon Ross  * allocated ranges into our returned token, so this compromise:
1638d499c80SGordon Ross  *
1648d499c80SGordon Ross  * When the current range is entirely within a "hole", we'll return
1658d499c80SGordon Ross  * the special "zeros" token, and the offload write using that token
1668d499c80SGordon Ross  * will use the simple and very efficient smb_fsop_freesp.  In this
1678d499c80SGordon Ross  * scenario, we'll have a copy stride of smb2_odx_read_max (256M).
1688d499c80SGordon Ross  *
1698d499c80SGordon Ross  * When there's any data in the range to copy, we'll return our
1708d499c80SGordon Ross  * "native" token, and the subsequent offload_write will walk the
1718d499c80SGordon Ross  * allocated ranges copying and/or zeroing as needed.  In this
1728d499c80SGordon Ross  * scenario, we'll have a copy stride of smb2_odx_write_max (16M).
1738d499c80SGordon Ross  *
1748d499c80SGordon Ross  * One additional optimization allowed by the protocol is that when
1758d499c80SGordon Ross  * we discover that there's no more data after the current range,
1768d499c80SGordon Ross  * we can set the flag ..._ALL_ZERO_BEYOND which tells that client
1778d499c80SGordon Ross  * they can stop copying here if they like.
1788d499c80SGordon Ross  */
1798d499c80SGordon Ross uint32_t
smb2_fsctl_odx_read(smb_request_t * sr,smb_fsctl_t * fsctl)1808d499c80SGordon Ross smb2_fsctl_odx_read(smb_request_t *sr, smb_fsctl_t *fsctl)
1818d499c80SGordon Ross {
1828d499c80SGordon Ross 	smb_attr_t src_attr;
1838d499c80SGordon Ross 	smb_odx_token_t *tok = NULL;
1848d499c80SGordon Ross 	struct tok_native1 *tn1;
1858d499c80SGordon Ross 	smb_ofile_t *ofile = sr->fid_ofile;
1868d499c80SGordon Ross 	uint64_t src_size, src_rnd_size;
1878d499c80SGordon Ross 	off64_t data, hole;
1888d499c80SGordon Ross 	uint32_t in_struct_size;
1898d499c80SGordon Ross 	uint32_t in_flags;
1908d499c80SGordon Ross 	uint32_t in_ttl;
1918d499c80SGordon Ross 	uint64_t in_file_off;
1928d499c80SGordon Ross 	uint64_t in_copy_len;
1938d499c80SGordon Ross 	uint64_t out_xlen;
1948d499c80SGordon Ross 	uint32_t out_struct_size = TOKEN_TOTAL_SIZE + 16;
1958d499c80SGordon Ross 	uint32_t out_flags = 0;
1968d499c80SGordon Ross 	uint32_t status;
1978d499c80SGordon Ross 	uint32_t tok_type;
1988d499c80SGordon Ross 	int rc;
1998d499c80SGordon Ross 
2008d499c80SGordon Ross 	if (smb2_odx_enable == 0)
20108f2ce59SGordon Ross 		return (NT_STATUS_INVALID_DEVICE_REQUEST);
2028d499c80SGordon Ross 
2038d499c80SGordon Ross 	/*
2048d499c80SGordon Ross 	 * Make sure the (src) ofile granted access allows read.
2058d499c80SGordon Ross 	 * [MS-FSA] didn't mention this, so it's not clear where
2068d499c80SGordon Ross 	 * this should happen relative to other checks.  Usually
2078d499c80SGordon Ross 	 * access checks happen early.
2088d499c80SGordon Ross 	 */
2098d499c80SGordon Ross 	status = smb_ofile_access(ofile, ofile->f_cr, FILE_READ_DATA);
2108d499c80SGordon Ross 	if (status != NT_STATUS_SUCCESS)
2118d499c80SGordon Ross 		return (status);
2128d499c80SGordon Ross 
2138d499c80SGordon Ross 	/*
2148d499c80SGordon Ross 	 * Decode FSCTL_OFFLOAD_READ_INPUT struct,
2158d499c80SGordon Ross 	 * and do in/out size checks.
2168d499c80SGordon Ross 	 */
2178d499c80SGordon Ross 	rc = smb_mbc_decodef(
2188d499c80SGordon Ross 	    fsctl->in_mbc, "lll4.qq",
2198d499c80SGordon Ross 	    &in_struct_size,	/* l */
2208d499c80SGordon Ross 	    &in_flags,		/* l */
2218d499c80SGordon Ross 	    &in_ttl,		/* l */
2228d499c80SGordon Ross 	    /* reserved		4. */
2238d499c80SGordon Ross 	    &in_file_off,	/* q */
2248d499c80SGordon Ross 	    &in_copy_len);	/* q */
2258d499c80SGordon Ross 	if (rc != 0)
2268d499c80SGordon Ross 		return (NT_STATUS_BUFFER_TOO_SMALL);
2278d499c80SGordon Ross 	if (fsctl->MaxOutputResp < out_struct_size)
2288d499c80SGordon Ross 		return (NT_STATUS_BUFFER_TOO_SMALL);
2298d499c80SGordon Ross 
2308d499c80SGordon Ross 	/*
2318d499c80SGordon Ross 	 * More arg checking per MS-FSA
2328d499c80SGordon Ross 	 */
2338d499c80SGordon Ross 	if ((in_file_off & OFFMASK) != 0 ||
2348d499c80SGordon Ross 	    (in_copy_len & OFFMASK) != 0)
2358d499c80SGordon Ross 		return (NT_STATUS_INVALID_PARAMETER);
2368d499c80SGordon Ross 	if (in_struct_size != 32)
2378d499c80SGordon Ross 		return (NT_STATUS_INVALID_PARAMETER);
2388d499c80SGordon Ross 	if (in_file_off > INT64_MAX ||
2398d499c80SGordon Ross 	    (in_file_off + in_copy_len) < in_file_off)
2408d499c80SGordon Ross 		return (NT_STATUS_INVALID_PARAMETER);
2418d499c80SGordon Ross 
2428d499c80SGordon Ross 	/*
2438d499c80SGordon Ross 	 * [MS-FSA] (summarizing)
2448d499c80SGordon Ross 	 * If not data stream, or if sparse, encrypted, compressed...
2458d499c80SGordon Ross 	 * return STATUS_OFFLOAD_READ_FILE_NOT_SUPPORTED.
2468d499c80SGordon Ross 	 *
2478d499c80SGordon Ross 	 * We'll ignore most of those except to require:
2488d499c80SGordon Ross 	 * Plain file, not a stream.
2498d499c80SGordon Ross 	 */
2508d499c80SGordon Ross 	if (!smb_node_is_file(ofile->f_node))
2518d499c80SGordon Ross 		return (NT_STATUS_OFFLOAD_READ_FILE_NOT_SUPPORTED);
2528d499c80SGordon Ross 	if (SMB_IS_STREAM(ofile->f_node))
2538d499c80SGordon Ross 		return (NT_STATUS_OFFLOAD_READ_FILE_NOT_SUPPORTED);
2548d499c80SGordon Ross 
2558d499c80SGordon Ross 	/*
2568d499c80SGordon Ross 	 * [MS-FSA] If Open.Stream.IsDeleted ...
2578d499c80SGordon Ross 	 */
258*f88ed57cSGordon Ross 	if (ofile->f_node->flags & NODE_FLAGS_DELETE_COMMITTED)
259*f88ed57cSGordon Ross 		return (NT_STATUS_FILE_DELETED);
2608d499c80SGordon Ross 
2618d499c80SGordon Ross 	/*
2628d499c80SGordon Ross 	 * If CopyLength == 0, "return immediately success".
2638d499c80SGordon Ross 	 */
2648d499c80SGordon Ross 	if (in_copy_len == 0) {
2658d499c80SGordon Ross 		out_xlen = 0;
2668d499c80SGordon Ross 		tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA;
2678d499c80SGordon Ross 		goto done;
2688d499c80SGordon Ross 	}
2698d499c80SGordon Ross 
2708d499c80SGordon Ross 	/*
2718d499c80SGordon Ross 	 * Check for lock conflicting with the read.
2728d499c80SGordon Ross 	 */
2738d499c80SGordon Ross 	status = smb_lock_range_access(sr, ofile->f_node,
2748d499c80SGordon Ross 	    in_file_off, in_copy_len, B_FALSE);
2758d499c80SGordon Ross 	if (status != 0)
2768d499c80SGordon Ross 		return (status); /* == FILE_LOCK_CONFLICT */
2778d499c80SGordon Ross 
2788d499c80SGordon Ross 	/*
2798d499c80SGordon Ross 	 * Get the file size (rounded to a full block)
2808d499c80SGordon Ross 	 * and check the requested offset.
2818d499c80SGordon Ross 	 */
2828d499c80SGordon Ross 	bzero(&src_attr, sizeof (src_attr));
2838d499c80SGordon Ross 	src_attr.sa_mask = SMB_AT_SIZE;
2848d499c80SGordon Ross 	status = smb2_ofile_getattr(sr, ofile, &src_attr);
2858d499c80SGordon Ross 	if (status != NT_STATUS_SUCCESS)
2868d499c80SGordon Ross 		return (status);
2878d499c80SGordon Ross 	src_size = src_attr.sa_vattr.va_size;
2888d499c80SGordon Ross 	if (in_file_off >= src_size)
2898d499c80SGordon Ross 		return (NT_STATUS_END_OF_FILE);
2908d499c80SGordon Ross 
2918d499c80SGordon Ross 	/*
2928d499c80SGordon Ross 	 * Limit the transfer length based on (rounded) EOF.
2938d499c80SGordon Ross 	 * Clients expect ranges of whole disk blocks.
2948d499c80SGordon Ross 	 * If we get a read in this rounded-up range,
2958d499c80SGordon Ross 	 * we'll supply zeros.
2968d499c80SGordon Ross 	 */
2978d499c80SGordon Ross 	src_rnd_size = (src_size + OFFMASK) & ~OFFMASK;
2988d499c80SGordon Ross 	out_xlen = in_copy_len;
2998d499c80SGordon Ross 	if ((in_file_off + out_xlen) > src_rnd_size)
3008d499c80SGordon Ross 		out_xlen = src_rnd_size - in_file_off;
3018d499c80SGordon Ross 
3028d499c80SGordon Ross 	/*
3038d499c80SGordon Ross 	 * Also, have the client come back for a new token after every
3048d499c80SGordon Ross 	 * smb2_odx_read_max bytes, so we'll have opportunities to
3058d499c80SGordon Ross 	 * recognize "holes" in the source file.
3068d499c80SGordon Ross 	 */
3078d499c80SGordon Ross 	if (out_xlen > smb2_odx_read_max)
3088d499c80SGordon Ross 		out_xlen = smb2_odx_read_max;
3098d499c80SGordon Ross 
3108d499c80SGordon Ross 	/*
3118d499c80SGordon Ross 	 * Ask the filesystem if there are any allocated regions in
3128d499c80SGordon Ross 	 * the requested range, and return either the "zeros" token
3138d499c80SGordon Ross 	 * or our "native" token as appropriate (details above).
3148d499c80SGordon Ross 	 */
3158d499c80SGordon Ross 	data = in_file_off;
3168d499c80SGordon Ross 	tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1;
3178d499c80SGordon Ross 	rc = smb_fsop_next_alloc_range(ofile->f_cr, ofile->f_node,
3188d499c80SGordon Ross 	    &data, &hole);
3198d499c80SGordon Ross 	switch (rc) {
3208d499c80SGordon Ross 	case 0:
3218d499c80SGordon Ross 		/* Found some data.  Is it beyond this range? */
3228d499c80SGordon Ross 		if (data >= (in_file_off + out_xlen))
3238d499c80SGordon Ross 			tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA;
3248d499c80SGordon Ross 		break;
3258d499c80SGordon Ross 	case ENXIO:
326*f88ed57cSGordon Ross 		/*
327*f88ed57cSGordon Ross 		 * No data here to EOF.  Use TOKEN_TYPE_ZERO_DATA,
328*f88ed57cSGordon Ross 		 * but only if we're not crossing src_size, because
329*f88ed57cSGordon Ross 		 * type zero cannot preserve unaligned src_size.
330*f88ed57cSGordon Ross 		 */
331*f88ed57cSGordon Ross 		if ((in_file_off + out_xlen) <= src_size)
332*f88ed57cSGordon Ross 			tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA;
3338d499c80SGordon Ross 		out_flags |= OFFLOAD_READ_FLAG_ALL_ZERO_BEYOND;
3348d499c80SGordon Ross 		break;
3358d499c80SGordon Ross 	case ENOSYS:	/* FS does not support VOP_IOCTL... */
3368d499c80SGordon Ross 	case ENOTTY:	/* ... or _FIO_SEEK_DATA, _HOLE */
3378d499c80SGordon Ross 		break;
3388d499c80SGordon Ross 	default:
3398d499c80SGordon Ross 		cmn_err(CE_NOTE, "smb_fsop_next_alloc_range: rc=%d", rc);
3408d499c80SGordon Ross 		break;
3418d499c80SGordon Ross 	}
3428d499c80SGordon Ross 
3438d499c80SGordon Ross done:
3448d499c80SGordon Ross 	/* Already checked MaxOutputResp */
3458d499c80SGordon Ross 	(void) smb_mbc_encodef(
3468d499c80SGordon Ross 	    fsctl->out_mbc, "llq",
3478d499c80SGordon Ross 	    out_struct_size,	/* l */
3488d499c80SGordon Ross 	    out_flags,		/* l */
3498d499c80SGordon Ross 	    out_xlen);		/* q */
3508d499c80SGordon Ross 
3518d499c80SGordon Ross 	/*
3528d499c80SGordon Ross 	 * Build the ODX token to return
3538d499c80SGordon Ross 	 */
3548d499c80SGordon Ross 	tok = smb_srm_zalloc(sr, sizeof (*tok));
3558d499c80SGordon Ross 	tok->tok_type = tok_type;
3568d499c80SGordon Ross 	tok->tok_reserved = 0;
3578d499c80SGordon Ross 	if (tok_type == STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1) {
3588d499c80SGordon Ross 		tok->tok_len = sizeof (*tn1);
3598d499c80SGordon Ross 		tn1 = &tok->tok_u.u_tok_native1;
3608d499c80SGordon Ross 		tn1->tn1_fid.persistent = ofile->f_persistid;
3618d499c80SGordon Ross 		tn1->tn1_fid.temporal = ofile->f_fid;
3628d499c80SGordon Ross 		tn1->tn1_off = in_file_off;
3638d499c80SGordon Ross 		tn1->tn1_eof = src_size;
364ba55de6bSGordon Ross 		tn1->tn1_tid = sr->smb_tid;
3658d499c80SGordon Ross 	}
3668d499c80SGordon Ross 
3678d499c80SGordon Ross 	rc = smb_odx_put_token(fsctl->out_mbc, tok);
3688d499c80SGordon Ross 	if (rc != 0)
3698d499c80SGordon Ross 		return (NT_STATUS_BUFFER_TOO_SMALL);
3708d499c80SGordon Ross 
3718d499c80SGordon Ross 	return (NT_STATUS_SUCCESS);
3728d499c80SGordon Ross }
3738d499c80SGordon Ross 
3748d499c80SGordon Ross /*
375*f88ed57cSGordon Ross  * FSCTL_OFFLOAD_WRITE
3768d499c80SGordon Ross  * [MS-FSCC] 2.3.80
3778d499c80SGordon Ross  *
3788d499c80SGordon Ross  * Similar (in concept) to FSCTL_COPYCHUNK_WRITE
3798d499c80SGordon Ross  *
3808d499c80SGordon Ross  * Copies from a source file identified by a "token"
3818d499c80SGordon Ross  * (previously returned by FSCTL_OFFLOAD_READ)
3828d499c80SGordon Ross  * to the file on which the ioctl is issued.
3838d499c80SGordon Ross  */
3848d499c80SGordon Ross uint32_t
smb2_fsctl_odx_write(smb_request_t * sr,smb_fsctl_t * fsctl)3858d499c80SGordon Ross smb2_fsctl_odx_write(smb_request_t *sr, smb_fsctl_t *fsctl)
3868d499c80SGordon Ross {
3878d499c80SGordon Ross 	smb_attr_t dst_attr;
3888d499c80SGordon Ross 	odx_write_args_t args;
3898d499c80SGordon Ross 	smb_odx_token_t *tok = NULL;
3908d499c80SGordon Ross 	smb_ofile_t *ofile = sr->fid_ofile;
3918d499c80SGordon Ross 	uint32_t status = NT_STATUS_INVALID_PARAMETER;
3928d499c80SGordon Ross 	int rc;
3938d499c80SGordon Ross 
3948d499c80SGordon Ross 	bzero(&args, sizeof (args));
3958d499c80SGordon Ross 	args.out_struct_size = 16;
3968d499c80SGordon Ross 
3978d499c80SGordon Ross 	if (smb2_odx_enable == 0)
39808f2ce59SGordon Ross 		return (NT_STATUS_INVALID_DEVICE_REQUEST);
3998d499c80SGordon Ross 
4008d499c80SGordon Ross 	/*
4018d499c80SGordon Ross 	 * Make sure the (dst) ofile granted_access allows write.
4028d499c80SGordon Ross 	 * [MS-FSA] didn't mention this, so it's not clear where
4038d499c80SGordon Ross 	 * this should happen relative to other checks.  Usually
4048d499c80SGordon Ross 	 * access checks happen early.
4058d499c80SGordon Ross 	 */
4068d499c80SGordon Ross 	status = smb_ofile_access(ofile, ofile->f_cr, FILE_WRITE_DATA);
4078d499c80SGordon Ross 	if (status != NT_STATUS_SUCCESS)
4088d499c80SGordon Ross 		return (status);
4098d499c80SGordon Ross 
4108d499c80SGordon Ross 	/*
4118d499c80SGordon Ross 	 * Decode FSCTL_OFFLOAD_WRITE_INPUT struct,
4128d499c80SGordon Ross 	 * and do in/out size checks.
4138d499c80SGordon Ross 	 */
4148d499c80SGordon Ross 	rc = smb_mbc_decodef(
4158d499c80SGordon Ross 	    fsctl->in_mbc, "llqqq",
4168d499c80SGordon Ross 	    &args.in_struct_size,	/* l */
4178d499c80SGordon Ross 	    &args.in_flags,		/* l */
4188d499c80SGordon Ross 	    &args.in_dstoff,		/* q */
4198d499c80SGordon Ross 	    &args.in_xlen,		/* q */
4208d499c80SGordon Ross 	    &args.in_xoff);		/* q */
4218d499c80SGordon Ross 	if (rc != 0)
4228d499c80SGordon Ross 		return (NT_STATUS_BUFFER_TOO_SMALL);
4238d499c80SGordon Ross 	tok = smb_srm_zalloc(sr, sizeof (*tok));
4248d499c80SGordon Ross 	rc = smb_odx_get_token(fsctl->in_mbc, tok);
4258d499c80SGordon Ross 	if (rc != 0)
4268d499c80SGordon Ross 		return (NT_STATUS_BUFFER_TOO_SMALL);
4278d499c80SGordon Ross 	if (fsctl->MaxOutputResp < args.out_struct_size)
4288d499c80SGordon Ross 		return (NT_STATUS_BUFFER_TOO_SMALL);
4298d499c80SGordon Ross 
4308d499c80SGordon Ross 	/*
4318d499c80SGordon Ross 	 * More arg checking per MS-FSA
4328d499c80SGordon Ross 	 */
4338d499c80SGordon Ross 	if ((args.in_dstoff & OFFMASK) != 0 ||
4348d499c80SGordon Ross 	    (args.in_xoff & OFFMASK) != 0 ||
4358d499c80SGordon Ross 	    (args.in_xlen & OFFMASK) != 0)
4368d499c80SGordon Ross 		return (NT_STATUS_INVALID_PARAMETER);
4378d499c80SGordon Ross 	if (args.in_struct_size != (TOKEN_TOTAL_SIZE + 32))
4388d499c80SGordon Ross 		return (NT_STATUS_INVALID_PARAMETER);
4398d499c80SGordon Ross 	if (args.in_dstoff > INT64_MAX ||
4408d499c80SGordon Ross 	    (args.in_dstoff + args.in_xlen) < args.in_dstoff)
4418d499c80SGordon Ross 		return (NT_STATUS_INVALID_PARAMETER);
4428d499c80SGordon Ross 
4438d499c80SGordon Ross 	/*
4448d499c80SGordon Ross 	 * If CopyLength == 0, "return immediately success".
4458d499c80SGordon Ross 	 */
4468d499c80SGordon Ross 	if (args.in_xlen == 0) {
4478d499c80SGordon Ross 		status = 0;
4488d499c80SGordon Ross 		goto done;
4498d499c80SGordon Ross 	}
4508d499c80SGordon Ross 
4518d499c80SGordon Ross 	/*
4528d499c80SGordon Ross 	 * [MS-FSA] (summarizing)
4538d499c80SGordon Ross 	 * If not data stream, or if sparse, encrypted, compressed...
4548d499c80SGordon Ross 	 * return STATUS_OFFLOAD_WRITE_FILE_NOT_SUPPORTED.
4558d499c80SGordon Ross 	 *
4568d499c80SGordon Ross 	 * We'll ignore most of those except to require:
4578d499c80SGordon Ross 	 * Plain file, not a stream.
4588d499c80SGordon Ross 	 */
4598d499c80SGordon Ross 	if (!smb_node_is_file(ofile->f_node))
4608d499c80SGordon Ross 		return (NT_STATUS_OFFLOAD_WRITE_FILE_NOT_SUPPORTED);
4618d499c80SGordon Ross 	if (SMB_IS_STREAM(ofile->f_node))
4628d499c80SGordon Ross 		return (NT_STATUS_OFFLOAD_WRITE_FILE_NOT_SUPPORTED);
4638d499c80SGordon Ross 
4648d499c80SGordon Ross 	/*
4658d499c80SGordon Ross 	 * [MS-FSA] If Open.Stream.IsDeleted ...
4668d499c80SGordon Ross 	 */
467*f88ed57cSGordon Ross 	if (ofile->f_node->flags & NODE_FLAGS_DELETE_COMMITTED)
468*f88ed57cSGordon Ross 		return (NT_STATUS_FILE_DELETED);
4698d499c80SGordon Ross 
4708d499c80SGordon Ross 	/*
4718d499c80SGordon Ross 	 * Check for lock conflicting with the write.
4728d499c80SGordon Ross 	 */
4738d499c80SGordon Ross 	status = smb_lock_range_access(sr, ofile->f_node,
4748d499c80SGordon Ross 	    args.in_dstoff, args.in_xlen, B_TRUE);
4758d499c80SGordon Ross 	if (status != 0)
4768d499c80SGordon Ross 		return (status); /* == FILE_LOCK_CONFLICT */
4778d499c80SGordon Ross 
4788d499c80SGordon Ross 	/*
4798d499c80SGordon Ross 	 * Need the file size
4808d499c80SGordon Ross 	 */
4818d499c80SGordon Ross 	bzero(&dst_attr, sizeof (dst_attr));
4828d499c80SGordon Ross 	dst_attr.sa_mask = SMB_AT_SIZE;
4838d499c80SGordon Ross 	status = smb2_ofile_getattr(sr, ofile, &dst_attr);
4848d499c80SGordon Ross 	if (status != NT_STATUS_SUCCESS)
4858d499c80SGordon Ross 		return (status);
4868d499c80SGordon Ross 	args.wa_eof = dst_attr.sa_vattr.va_size;
4878d499c80SGordon Ross 
4888d499c80SGordon Ross 	/*
4898d499c80SGordon Ross 	 * Destination offset vs. EOF
4908d499c80SGordon Ross 	 */
491*f88ed57cSGordon Ross 	if (args.in_dstoff > args.wa_eof)
4928d499c80SGordon Ross 		return (NT_STATUS_END_OF_FILE);
4938d499c80SGordon Ross 
4948d499c80SGordon Ross 	/*
4958d499c80SGordon Ross 	 * Finally, run the I/O
4968d499c80SGordon Ross 	 */
4978d499c80SGordon Ross 	switch (tok->tok_type) {
4988d499c80SGordon Ross 	case STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA:
4998d499c80SGordon Ross 		status = smb2_fsctl_odx_write_zeros(sr, &args);
5008d499c80SGordon Ross 		break;
5018d499c80SGordon Ross 	case STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1:
5028d499c80SGordon Ross 		status = smb2_fsctl_odx_write_native1(sr, &args, tok);
5038d499c80SGordon Ross 		break;
5048d499c80SGordon Ross 	default:
5058d499c80SGordon Ross 		status = NT_STATUS_INVALID_TOKEN;
5068d499c80SGordon Ross 		break;
5078d499c80SGordon Ross 	}
5088d499c80SGordon Ross 
5098d499c80SGordon Ross done:
5108d499c80SGordon Ross 	/*
5118d499c80SGordon Ross 	 * Checked MaxOutputResp above, so we can ignore errors
5128d499c80SGordon Ross 	 * from mbc_encodef here.
5138d499c80SGordon Ross 	 */
5148d499c80SGordon Ross 	if (status == NT_STATUS_SUCCESS) {
5158d499c80SGordon Ross 		(void) smb_mbc_encodef(
5168d499c80SGordon Ross 		    fsctl->out_mbc, "llq",
5178d499c80SGordon Ross 		    args.out_struct_size,
5188d499c80SGordon Ross 		    args.out_flags,
5198d499c80SGordon Ross 		    args.out_xlen);
5208d499c80SGordon Ross 	}
5218d499c80SGordon Ross 
5228d499c80SGordon Ross 	return (status);
5238d499c80SGordon Ross }
5248d499c80SGordon Ross 
5258d499c80SGordon Ross /*
5268d499c80SGordon Ross  * Handle FSCTL_OFFLOAD_WRITE with token type
5278d499c80SGordon Ross  * STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA
5288d499c80SGordon Ross  *
529*f88ed57cSGordon Ross  * In this handler, the "token" represents a source of zeros,
530*f88ed57cSGordon Ross  * limited to the range: in_dstoff to (in_dstoff + in_xlen)
531*f88ed57cSGordon Ross  *
532*f88ed57cSGordon Ross  * ODX write handlers are allowed to return any transfer amount
533*f88ed57cSGordon Ross  * less than or equal to the requested size.  We want to limit
534*f88ed57cSGordon Ross  * the amount of I/O "work" we do per ODX write call.  Here,
535*f88ed57cSGordon Ross  * we're only doing meta-data operations, so we'll allow up to
536*f88ed57cSGordon Ross  * up to smb2_odx_read_max (256M) per call.
537*f88ed57cSGordon Ross  *
538*f88ed57cSGordon Ross  * The I/O "work" done by this function is to make zeros appear
539*f88ed57cSGordon Ross  * in the file in the range: in_dstoff, (in_dstoff + in_xlen).
540*f88ed57cSGordon Ross  * Rather than actually write zeros, we'll use VOP_SPACE to
541*f88ed57cSGordon Ross  * make "holes" in the file.  If any of the range we're asked
542*f88ed57cSGordon Ross  * to zero out is beyond the destination EOF, we can simply
543*f88ed57cSGordon Ross  * extend the file length (zeros will appear).
544*f88ed57cSGordon Ross  *
545*f88ed57cSGordon Ross  * The caller has verified block alignement of:
546*f88ed57cSGordon Ross  * args->in_dstoff, args->in_xoff, args->in_xlen
5478d499c80SGordon Ross  */
5488d499c80SGordon Ross static uint32_t
smb2_fsctl_odx_write_zeros(smb_request_t * sr,odx_write_args_t * args)5498d499c80SGordon Ross smb2_fsctl_odx_write_zeros(smb_request_t *sr, odx_write_args_t *args)
5508d499c80SGordon Ross {
5518d499c80SGordon Ross 	smb_ofile_t *dst_ofile = sr->fid_ofile;
552*f88ed57cSGordon Ross 	uint64_t xlen;
5538d499c80SGordon Ross 	int rc;
5548d499c80SGordon Ross 
5558d499c80SGordon Ross 	ASSERT(args->in_xlen > 0);
556*f88ed57cSGordon Ross 	args->out_xlen = 0;
5578d499c80SGordon Ross 
5588d499c80SGordon Ross 	/*
559*f88ed57cSGordon Ross 	 * Limit the I/O size. (per above)
5608d499c80SGordon Ross 	 */
561*f88ed57cSGordon Ross 	if (args->in_xlen > smb2_odx_read_max)
562*f88ed57cSGordon Ross 		args->in_xlen = smb2_odx_read_max;
5638d499c80SGordon Ross 
5648d499c80SGordon Ross 	/*
565*f88ed57cSGordon Ross 	 * Handle the part below destination EOF.
566*f88ed57cSGordon Ross 	 * (in_dstoff to wa_eof).
5678d499c80SGordon Ross 	 */
568*f88ed57cSGordon Ross 	if (args->in_dstoff < args->wa_eof) {
569*f88ed57cSGordon Ross 		xlen = args->in_xlen;
570*f88ed57cSGordon Ross 		if ((args->in_dstoff + xlen) > args->wa_eof) {
571*f88ed57cSGordon Ross 			xlen = args->wa_eof - args->in_dstoff;
572*f88ed57cSGordon Ross 			ASSERT(xlen < args->in_xlen);
573*f88ed57cSGordon Ross 		}
574*f88ed57cSGordon Ross 		rc = smb_fsop_freesp(sr, dst_ofile->f_cr, dst_ofile,
575*f88ed57cSGordon Ross 		    args->in_dstoff, xlen);
576*f88ed57cSGordon Ross 		if (rc != 0) {
577*f88ed57cSGordon Ross 			/* Let client fall-back to normal copy. */
578*f88ed57cSGordon Ross 			return (NT_STATUS_OFFLOAD_WRITE_FILE_NOT_SUPPORTED);
579*f88ed57cSGordon Ross 		}
580*f88ed57cSGordon Ross 	}
5818d499c80SGordon Ross 
5828d499c80SGordon Ross 	/*
583*f88ed57cSGordon Ross 	 * Now the part after destination EOF, if any.
584*f88ed57cSGordon Ross 	 * Just set the file size.
585*f88ed57cSGordon Ross 	 */
586*f88ed57cSGordon Ross 	if ((args->in_dstoff + args->in_xlen) > args->wa_eof) {
587*f88ed57cSGordon Ross 		smb_attr_t attr;
588*f88ed57cSGordon Ross 
589*f88ed57cSGordon Ross 		bzero(&attr, sizeof (smb_attr_t));
590*f88ed57cSGordon Ross 		attr.sa_mask = SMB_AT_SIZE;
591*f88ed57cSGordon Ross 		attr.sa_vattr.va_size = args->in_dstoff + args->in_xlen;
592*f88ed57cSGordon Ross 
593*f88ed57cSGordon Ross 		rc = smb_node_setattr(sr, dst_ofile->f_node,
594*f88ed57cSGordon Ross 		    dst_ofile->f_cr, dst_ofile, &attr);
595*f88ed57cSGordon Ross 		if (rc != 0) {
596*f88ed57cSGordon Ross 			return (smb_errno2status(rc));
597*f88ed57cSGordon Ross 		}
5988d499c80SGordon Ross 	}
5998d499c80SGordon Ross 
600*f88ed57cSGordon Ross 	args->out_xlen = args->in_xlen;
601*f88ed57cSGordon Ross 
602*f88ed57cSGordon Ross 	return (0);
6038d499c80SGordon Ross }
6048d499c80SGordon Ross 
6058d499c80SGordon Ross /*
6068d499c80SGordon Ross  * Handle FSCTL_OFFLOAD_WRITE with token type
6078d499c80SGordon Ross  * STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1
608*f88ed57cSGordon Ross  *
609*f88ed57cSGordon Ross  * For this handler, the token represents a valid range in the
610*f88ed57cSGordon Ross  * source file (tn1_off to tn1_eof).  The token contains enough
611*f88ed57cSGordon Ross  * information for us to find the tree and file handle that the
612*f88ed57cSGordon Ross  * client has open on the source file for this copy.
613*f88ed57cSGordon Ross  *
614*f88ed57cSGordon Ross  * ODX write handlers are allowed to return any transfer amount
615*f88ed57cSGordon Ross  * less than or equal to the requested size.  We want to limit
616*f88ed57cSGordon Ross  * the amount of I/O "work" we do per ODX write call.  Here,
617*f88ed57cSGordon Ross  * we're actually copying from another file, so limit transfers
618*f88ed57cSGordon Ross  * to smb2_odx_write_max (16M) per call.
619*f88ed57cSGordon Ross  *
620*f88ed57cSGordon Ross  * Copying past un-aligned end of source file:
621*f88ed57cSGordon Ross  *
622*f88ed57cSGordon Ross  * The MS-FSA spec. is silent about copying when the file length is
623*f88ed57cSGordon Ross  * not block aligned. Clients normally request copying a range that's
624*f88ed57cSGordon Ross  * the file size rounded up to a block boundary, and expect that copy
625*f88ed57cSGordon Ross  * to extend the destination as long as the copy has not crossed the
626*f88ed57cSGordon Ross  * EOF in the source file.  This means that the last block we copy
627*f88ed57cSGordon Ross  * will generally be a partial copy, where the first part comes from
628*f88ed57cSGordon Ross  * the source file, and the remainider is either zeros or truncated.
629*f88ed57cSGordon Ross  *
630*f88ed57cSGordon Ross  * Extending the destination file:
631*f88ed57cSGordon Ross  *
632*f88ed57cSGordon Ross  * With a whole file copy, we want the destination file length to
633*f88ed57cSGordon Ross  * match the source file length, even if it's not block aligned.
634*f88ed57cSGordon Ross  * We could just never extend the destination file, but there are
635*f88ed57cSGordon Ross  * WPTS tests that prove that ODX write IS supposed to extend the
636*f88ed57cSGordon Ross  * destination file when appropriate.  This is solved by having
637*f88ed57cSGordon Ross  * this write handler extend the destination file as long as the
638*f88ed57cSGordon Ross  * copy has not yet crossed EOF in the source file.  After we've
639*f88ed57cSGordon Ross  * past the source EOF with copying, we'll zero out the remainder
640*f88ed57cSGordon Ross  * of the block in which the copy stopped, stopping at either the
641*f88ed57cSGordon Ross  * end of the block or the end of the destination file, whichever
642*f88ed57cSGordon Ross  * comes first.  This guarantees that a future read anywhere in
643*f88ed57cSGordon Ross  * that range will see either data from the source file or zeros.
644*f88ed57cSGordon Ross  *
645*f88ed57cSGordon Ross  * Note that no matter which way we stopped copying, we MUST
646*f88ed57cSGordon Ross  * return a block-aligned transfer size in our response.
647*f88ed57cSGordon Ross  * The caller has verified block alignement of:
648*f88ed57cSGordon Ross  * args->in_dstoff, args->in_xoff, args->in_xlen
6498d499c80SGordon Ross  */
6508d499c80SGordon Ross static uint32_t
smb2_fsctl_odx_write_native1(smb_request_t * sr,odx_write_args_t * args,smb_odx_token_t * tok)6518d499c80SGordon Ross smb2_fsctl_odx_write_native1(smb_request_t *sr,
6528d499c80SGordon Ross     odx_write_args_t *args, smb_odx_token_t *tok)
6538d499c80SGordon Ross {
6548d499c80SGordon Ross 	struct tok_native1 *tn1;
6558d499c80SGordon Ross 	smb_ofile_t *dst_ofile = sr->fid_ofile;
6568d499c80SGordon Ross 	smb_ofile_t *src_ofile = NULL;
6578d499c80SGordon Ross 	void *buffer = NULL;
6588d499c80SGordon Ross 	size_t bufsize = smb2_odx_buf_size;
6598d499c80SGordon Ross 	uint64_t src_offset;
6608d499c80SGordon Ross 	uint32_t resid;
6618d499c80SGordon Ross 	uint32_t xlen;
6628d499c80SGordon Ross 	uint32_t status;
6638d499c80SGordon Ross 
664*f88ed57cSGordon Ross 	ASSERT(args->in_xlen > 0);
665*f88ed57cSGordon Ross 	args->out_xlen = 0;
666*f88ed57cSGordon Ross 
6678d499c80SGordon Ross 	/*
668*f88ed57cSGordon Ross 	 * Limit the I/O size. (per above)
669*f88ed57cSGordon Ross 	 */
670*f88ed57cSGordon Ross 	if (args->in_xlen > smb2_odx_write_max)
671*f88ed57cSGordon Ross 		args->in_xlen = smb2_odx_write_max;
672*f88ed57cSGordon Ross 
673*f88ed57cSGordon Ross 	/*
674*f88ed57cSGordon Ross 	 * Lookup the source ofile using the "token".
6758d499c80SGordon Ross 	 */
6768d499c80SGordon Ross 	tn1 = &tok->tok_u.u_tok_native1;
677ba55de6bSGordon Ross 
678ba55de6bSGordon Ross 	/*
679ba55de6bSGordon Ross 	 * If the source ofile came from another tree, we need to
680ba55de6bSGordon Ross 	 * get the other tree and use it for the fid lookup.
681ba55de6bSGordon Ross 	 * Do that by temporarily changing sr->tid_tree around
682ba55de6bSGordon Ross 	 * the call to smb_ofile_lookup_by_fid().
683ba55de6bSGordon Ross 	 */
684ba55de6bSGordon Ross 	if (tn1->tn1_tid != sr->smb_tid) {
685ba55de6bSGordon Ross 		smb_tree_t *saved_tree;
686ba55de6bSGordon Ross 		smb_tree_t *src_tree;
687ba55de6bSGordon Ross 
688ba55de6bSGordon Ross 		src_tree = smb_session_lookup_tree(sr->session,
689ba55de6bSGordon Ross 		    (uint16_t)tn1->tn1_tid);
690ba55de6bSGordon Ross 		if (src_tree == NULL) {
691ba55de6bSGordon Ross 			status = NT_STATUS_INVALID_TOKEN;
692ba55de6bSGordon Ross 			goto out;
693ba55de6bSGordon Ross 		}
694ba55de6bSGordon Ross 
695ba55de6bSGordon Ross 		saved_tree = sr->tid_tree;
696ba55de6bSGordon Ross 		sr->tid_tree = src_tree;
697ba55de6bSGordon Ross 
698ba55de6bSGordon Ross 		src_ofile = smb_ofile_lookup_by_fid(sr,
699ba55de6bSGordon Ross 		    (uint16_t)tn1->tn1_fid.temporal);
700ba55de6bSGordon Ross 
701ba55de6bSGordon Ross 		sr->tid_tree = saved_tree;
702ba55de6bSGordon Ross 		smb_tree_release(src_tree);
703ba55de6bSGordon Ross 	} else {
704ba55de6bSGordon Ross 		src_ofile = smb_ofile_lookup_by_fid(sr,
705ba55de6bSGordon Ross 		    (uint16_t)tn1->tn1_fid.temporal);
706ba55de6bSGordon Ross 	}
707ba55de6bSGordon Ross 
7088d499c80SGordon Ross 	if (src_ofile == NULL ||
7098d499c80SGordon Ross 	    src_ofile->f_persistid != tn1->tn1_fid.persistent) {
7108d499c80SGordon Ross 		status = NT_STATUS_INVALID_TOKEN;
7118d499c80SGordon Ross 		goto out;
7128d499c80SGordon Ross 	}
7138d499c80SGordon Ross 
7148d499c80SGordon Ross 	/*
7158d499c80SGordon Ross 	 * Make sure src_ofile is open on a regular file, and
716*f88ed57cSGordon Ross 	 * granted access includes READ_DATA.  These were all
717*f88ed57cSGordon Ross 	 * validated in ODX READ, so if these checks fail it
718*f88ed57cSGordon Ross 	 * means somebody messed with the token or something.
7198d499c80SGordon Ross 	 */
7208d499c80SGordon Ross 	if (!smb_node_is_file(src_ofile->f_node)) {
7218d499c80SGordon Ross 		status = NT_STATUS_ACCESS_DENIED;
7228d499c80SGordon Ross 		goto out;
7238d499c80SGordon Ross 	}
7248d499c80SGordon Ross 	status = smb_ofile_access(src_ofile, src_ofile->f_cr, FILE_READ_DATA);
7258d499c80SGordon Ross 	if (status != NT_STATUS_SUCCESS)
7268d499c80SGordon Ross 		goto out;
7278d499c80SGordon Ross 
7288d499c80SGordon Ross 	/*
729*f88ed57cSGordon Ross 	 * Get a buffer used for copying, always smb2_odx_buf_size
730*f88ed57cSGordon Ross 	 *
731*f88ed57cSGordon Ross 	 * Rather than sleep for this relatively large allocation,
732*f88ed57cSGordon Ross 	 * allow the allocation to fail and return an error.
733*f88ed57cSGordon Ross 	 * The client should then fall back to normal copy.
7348d499c80SGordon Ross 	 */
735*f88ed57cSGordon Ross 	buffer = kmem_alloc(bufsize, KM_NOSLEEP_LAZY);
736*f88ed57cSGordon Ross 	if (buffer == NULL) {
737*f88ed57cSGordon Ross 		status = NT_STATUS_INSUFF_SERVER_RESOURCES;
738*f88ed57cSGordon Ross 		goto out;
739*f88ed57cSGordon Ross 	}
7408d499c80SGordon Ross 
7418d499c80SGordon Ross 	/*
7428d499c80SGordon Ross 	 * Note: in_xoff is relative to the beginning of the "token"
7438d499c80SGordon Ross 	 * (a range of the source file tn1_off, tn1_eof).  Make sure
7448d499c80SGordon Ross 	 * in_xoff is within the range represented by this token.
7458d499c80SGordon Ross 	 */
7468d499c80SGordon Ross 	src_offset = tn1->tn1_off + args->in_xoff;
7478d499c80SGordon Ross 	if (src_offset >= tn1->tn1_eof ||
7488d499c80SGordon Ross 	    src_offset < tn1->tn1_off) {
7498d499c80SGordon Ross 		status = NT_STATUS_INVALID_PARAMETER;
7508d499c80SGordon Ross 		goto out;
7518d499c80SGordon Ross 	}
7528d499c80SGordon Ross 
7538d499c80SGordon Ross 	/*
754*f88ed57cSGordon Ross 	 * Source offset+len vs. source EOF (see top comment)
7558d499c80SGordon Ross 	 */
756*f88ed57cSGordon Ross 	xlen = (uint32_t)args->in_xlen;
757*f88ed57cSGordon Ross 	if ((src_offset + xlen) > tn1->tn1_eof) {
758*f88ed57cSGordon Ross 		/*
759*f88ed57cSGordon Ross 		 * Copying would pass tn1_eof.  Reduce xlen.
760*f88ed57cSGordon Ross 		 */
761*f88ed57cSGordon Ross 		DTRACE_PROBE3(crossed__eof, smb_request_t *, sr,
762*f88ed57cSGordon Ross 		    odx_write_args_t *, args, smb_odx_token_t *, tok);
763*f88ed57cSGordon Ross 		xlen = (uint32_t)(tn1->tn1_eof - src_offset);
7648d499c80SGordon Ross 	}
7658d499c80SGordon Ross 
7668d499c80SGordon Ross 	/*
767*f88ed57cSGordon Ross 	 * Copy src to dst for xlen.  This MAY extend the dest file.
768*f88ed57cSGordon Ross 	 * Note: xlen may be not block-aligned now.  Handled below.
7698d499c80SGordon Ross 	 */
7708d499c80SGordon Ross 	resid = xlen;
7718d499c80SGordon Ross 	status = smb2_sparse_copy(sr, src_ofile, dst_ofile,
7728d499c80SGordon Ross 	    src_offset, args->in_dstoff, &resid, buffer, bufsize);
7738d499c80SGordon Ross 
7748d499c80SGordon Ross 	/*
775*f88ed57cSGordon Ross 	 * If the result was a partial copy, round down the reported
776*f88ed57cSGordon Ross 	 * transfer size to a block boundary. If we moved any data,
777*f88ed57cSGordon Ross 	 * suppress errors on this call.  If an error was suppressed,
778*f88ed57cSGordon Ross 	 * it will happen again and be returned on the next call.
7798d499c80SGordon Ross 	 */
780*f88ed57cSGordon Ross 	if (status != 0 || resid != 0) {
7818d499c80SGordon Ross 		xlen -= resid;
7828d499c80SGordon Ross 		xlen &= ~OFFMASK;
7838d499c80SGordon Ross 		args->out_xlen = xlen;
784*f88ed57cSGordon Ross 		/* If we moved any data, suppress errors. */
785*f88ed57cSGordon Ross 		if (xlen > 0)
786*f88ed57cSGordon Ross 			status = 0;
787*f88ed57cSGordon Ross 		goto out;
7888d499c80SGordon Ross 	}
7898d499c80SGordon Ross 
7908d499c80SGordon Ross 	/*
791*f88ed57cSGordon Ross 	 * If the copying covered the whole in_xlen, we're done.
792*f88ed57cSGordon Ross 	 * The test is >= here just so we can guarantee < below.
7938d499c80SGordon Ross 	 */
794*f88ed57cSGordon Ross 	if (xlen >= args->in_xlen) {
795*f88ed57cSGordon Ross 		args->out_xlen = args->in_xlen;
796*f88ed57cSGordon Ross 		goto out;
797*f88ed57cSGordon Ross 	}
798*f88ed57cSGordon Ross 
799*f88ed57cSGordon Ross 	/*
800*f88ed57cSGordon Ross 	 * Have: xlen < args->in_xlen
801*f88ed57cSGordon Ross 	 *
802*f88ed57cSGordon Ross 	 * Here we know xlen was reduced because the copy
803*f88ed57cSGordon Ross 	 * crossed the source EOF.  See top comment.
804*f88ed57cSGordon Ross 	 * Set the rounded-up transfer size now, and
805*f88ed57cSGordon Ross 	 * deal with the remainder of the last block.
806*f88ed57cSGordon Ross 	 */
807*f88ed57cSGordon Ross 	args->out_xlen = (xlen + OFFMASK) & ~OFFMASK;
808*f88ed57cSGordon Ross 
809*f88ed57cSGordon Ross 	/*
810*f88ed57cSGordon Ross 	 * If smb2_sparse_copy passed wa_eof, that means we've
811*f88ed57cSGordon Ross 	 * extended the file, so the remainder of the last block
812*f88ed57cSGordon Ross 	 * written is beyond the destination EOF was, so there's
813*f88ed57cSGordon Ross 	 * no need to zero out the remainder. "We're done".
814*f88ed57cSGordon Ross 	 */
815*f88ed57cSGordon Ross 	args->in_dstoff += xlen;
816*f88ed57cSGordon Ross 	if (args->in_dstoff >= args->wa_eof)
817*f88ed57cSGordon Ross 		goto out;
818*f88ed57cSGordon Ross 
819*f88ed57cSGordon Ross 	/*
820*f88ed57cSGordon Ross 	 * Have: in_dstoff < wa_eof
821*f88ed57cSGordon Ross 	 *
822*f88ed57cSGordon Ross 	 * Zero out the unwritten part of the last block that
823*f88ed57cSGordon Ross 	 * falls before the destination EOF. (Not extending.)
824*f88ed57cSGordon Ross 	 * Here, resid is the length of the part we'll zero.
825*f88ed57cSGordon Ross 	 */
826*f88ed57cSGordon Ross 	resid = args->out_xlen - xlen;
827*f88ed57cSGordon Ross 	if ((args->in_dstoff + resid) > args->wa_eof)
828*f88ed57cSGordon Ross 		resid = args->wa_eof - args->in_dstoff;
829*f88ed57cSGordon Ross 	if (resid > 0) {
830*f88ed57cSGordon Ross 		int rc;
831*f88ed57cSGordon Ross 		/*
832*f88ed57cSGordon Ross 		 * Zero out in_dstoff to wa_eof.
833*f88ed57cSGordon Ross 		 */
834*f88ed57cSGordon Ross 		rc = smb_fsop_freesp(sr, dst_ofile->f_cr, dst_ofile,
835*f88ed57cSGordon Ross 		    args->in_dstoff, resid);
836*f88ed57cSGordon Ross 		if (rc != 0) {
837*f88ed57cSGordon Ross 			status = smb_errno2status(rc);
838*f88ed57cSGordon Ross 		}
839*f88ed57cSGordon Ross 	}
8408d499c80SGordon Ross 
8418d499c80SGordon Ross out:
8428d499c80SGordon Ross 	if (src_ofile != NULL)
8438d499c80SGordon Ross 		smb_ofile_release(src_ofile);
8448d499c80SGordon Ross 
8458d499c80SGordon Ross 	if (buffer != NULL)
8468d499c80SGordon Ross 		kmem_free(buffer, bufsize);
8478d499c80SGordon Ross 
8488d499c80SGordon Ross 	return (status);
8498d499c80SGordon Ross }
8508d499c80SGordon Ross 
8518d499c80SGordon Ross /*
8528d499c80SGordon Ross  * Get an smb_odx_token_t from the (input) mbuf chain.
8538d499c80SGordon Ross  * Consumes exactly TOKEN_TOTAL_SIZE bytes.
8548d499c80SGordon Ross  */
8558d499c80SGordon Ross static int
smb_odx_get_token(mbuf_chain_t * mbc,smb_odx_token_t * tok)8568d499c80SGordon Ross smb_odx_get_token(mbuf_chain_t *mbc, smb_odx_token_t *tok)
8578d499c80SGordon Ross {
8588d499c80SGordon Ross 	mbuf_chain_t tok_mbc;
8598d499c80SGordon Ross 	int start_pos = mbc->chain_offset;
8608d499c80SGordon Ross 	int rc;
8618d499c80SGordon Ross 
8628d499c80SGordon Ross 	if (MBC_ROOM_FOR(mbc, TOKEN_TOTAL_SIZE) == 0)
8638d499c80SGordon Ross 		return (-1);
8648d499c80SGordon Ross 
8658d499c80SGordon Ross 	/*
8668d499c80SGordon Ross 	 * No big-endian support in smb_mbc_encodef, so swap
8678d499c80SGordon Ross 	 * the big-endian fields: tok_type (32-bits),
8688d499c80SGordon Ross 	 * (reserved is 16-bit zero, so no swap),
8698d499c80SGordon Ross 	 * and tok_len (16-bits)
8708d499c80SGordon Ross 	 */
8718d499c80SGordon Ross 	rc = smb_mbc_decodef(
8728d499c80SGordon Ross 	    mbc, "l..w",
8738d499c80SGordon Ross 	    &tok->tok_type,
8748d499c80SGordon Ross 	    /* tok_reserved */
8758d499c80SGordon Ross 	    &tok->tok_len);
8768d499c80SGordon Ross 	if (rc != 0)
8778d499c80SGordon Ross 		return (rc);
8788d499c80SGordon Ross 	tok->tok_type = BSWAP_32(tok->tok_type);
8798d499c80SGordon Ross 	tok->tok_len = BSWAP_16(tok->tok_len);
8808d499c80SGordon Ross 
8818d499c80SGordon Ross 	if (tok->tok_len > TOKEN_MAX_PAYLOAD)
8828d499c80SGordon Ross 		return (-1);
8838d499c80SGordon Ross 	rc = MBC_SHADOW_CHAIN(&tok_mbc, mbc,
8848d499c80SGordon Ross 	    mbc->chain_offset, tok->tok_len);
8858d499c80SGordon Ross 	if (rc != 0)
8868d499c80SGordon Ross 		return (rc);
8878d499c80SGordon Ross 
8888d499c80SGordon Ross 	switch (tok->tok_type) {
8898d499c80SGordon Ross 	case STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA:
8908d499c80SGordon Ross 		/* no payload */
8918d499c80SGordon Ross 		break;
8928d499c80SGordon Ross 	case STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1:
8938d499c80SGordon Ross 		rc = smb_odx_get_token_native1(&tok_mbc,
8948d499c80SGordon Ross 		    &tok->tok_u.u_tok_native1);
8958d499c80SGordon Ross 		break;
8968d499c80SGordon Ross 	default:
8978d499c80SGordon Ross 		/* caller will error out */
8988d499c80SGordon Ross 		break;
8998d499c80SGordon Ross 	}
9008d499c80SGordon Ross 
9018d499c80SGordon Ross 	if (rc == 0) {
9028d499c80SGordon Ross 		/* Advance past what we shadowed. */
9038d499c80SGordon Ross 		mbc->chain_offset = start_pos + TOKEN_TOTAL_SIZE;
9048d499c80SGordon Ross 	}
9058d499c80SGordon Ross 
9068d499c80SGordon Ross 	return (rc);
9078d499c80SGordon Ross }
9088d499c80SGordon Ross 
9098d499c80SGordon Ross static int
smb_odx_get_token_native1(mbuf_chain_t * mbc,struct tok_native1 * tn1)9108d499c80SGordon Ross smb_odx_get_token_native1(mbuf_chain_t *mbc, struct tok_native1 *tn1)
9118d499c80SGordon Ross {
9128d499c80SGordon Ross 	int rc;
9138d499c80SGordon Ross 
9148d499c80SGordon Ross 	rc = smb_mbc_decodef(
915ba55de6bSGordon Ross 	    mbc, "qqqql",
9168d499c80SGordon Ross 	    &tn1->tn1_fid.persistent,
9178d499c80SGordon Ross 	    &tn1->tn1_fid.temporal,
9188d499c80SGordon Ross 	    &tn1->tn1_off,
919ba55de6bSGordon Ross 	    &tn1->tn1_eof,
920ba55de6bSGordon Ross 	    &tn1->tn1_tid);
9218d499c80SGordon Ross 
9228d499c80SGordon Ross 	return (rc);
9238d499c80SGordon Ross }
9248d499c80SGordon Ross 
9258d499c80SGordon Ross /*
9268d499c80SGordon Ross  * Put an smb_odx_token_t into the (output) mbuf chain,
9278d499c80SGordon Ross  * padded to TOKEN_TOTAL_SIZE bytes.
9288d499c80SGordon Ross  */
9298d499c80SGordon Ross static int
smb_odx_put_token(mbuf_chain_t * mbc,smb_odx_token_t * tok)9308d499c80SGordon Ross smb_odx_put_token(mbuf_chain_t *mbc, smb_odx_token_t *tok)
9318d499c80SGordon Ross {
9328d499c80SGordon Ross 	int rc, padlen;
9338d499c80SGordon Ross 	int start_pos = mbc->chain_offset;
9348d499c80SGordon Ross 	int end_pos = start_pos + TOKEN_TOTAL_SIZE;
9358d499c80SGordon Ross 
9368d499c80SGordon Ross 	if (tok->tok_len > TOKEN_MAX_PAYLOAD)
9378d499c80SGordon Ross 		return (-1);
9388d499c80SGordon Ross 
9398d499c80SGordon Ross 	/*
9408d499c80SGordon Ross 	 * No big-endian support in smb_mbc_encodef, so swap
9418d499c80SGordon Ross 	 * the big-endian fields: tok_type (32-bits),
9428d499c80SGordon Ross 	 * (reserved is 16-bit zero, so no swap),
9438d499c80SGordon Ross 	 * and tok_len (16-bits)
9448d499c80SGordon Ross 	 */
9458d499c80SGordon Ross 	rc = smb_mbc_encodef(
9468d499c80SGordon Ross 	    mbc, "lww",
9478d499c80SGordon Ross 	    BSWAP_32(tok->tok_type),
9488d499c80SGordon Ross 	    0, /* tok_reserved */
9498d499c80SGordon Ross 	    BSWAP_16(tok->tok_len));
9508d499c80SGordon Ross 	if (rc != 0)
9518d499c80SGordon Ross 		return (rc);
9528d499c80SGordon Ross 
9538d499c80SGordon Ross 	switch (tok->tok_type) {
9548d499c80SGordon Ross 	case STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA:
9558d499c80SGordon Ross 		/* no payload */
9568d499c80SGordon Ross 		break;
9578d499c80SGordon Ross 	case STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1:
9588d499c80SGordon Ross 		rc = smb_odx_put_token_native1(mbc,
9598d499c80SGordon Ross 		    &tok->tok_u.u_tok_native1);
9608d499c80SGordon Ross 		break;
9618d499c80SGordon Ross 	default:
9628d499c80SGordon Ross 		ASSERT(0);
9638d499c80SGordon Ross 		return (-1);
9648d499c80SGordon Ross 	}
9658d499c80SGordon Ross 
9668d499c80SGordon Ross 	/* Pad out to TOKEN_TOTAL_SIZE bytes. */
9678d499c80SGordon Ross 	if (mbc->chain_offset < end_pos) {
9688d499c80SGordon Ross 		padlen = end_pos - mbc->chain_offset;
9698d499c80SGordon Ross 		(void) smb_mbc_encodef(mbc, "#.", padlen);
9708d499c80SGordon Ross 	}
9718d499c80SGordon Ross 	ASSERT(mbc->chain_offset == end_pos);
9728d499c80SGordon Ross 
9738d499c80SGordon Ross 	return (rc);
9748d499c80SGordon Ross }
9758d499c80SGordon Ross 
9768d499c80SGordon Ross static int
smb_odx_put_token_native1(mbuf_chain_t * mbc,struct tok_native1 * tn1)9778d499c80SGordon Ross smb_odx_put_token_native1(mbuf_chain_t *mbc, struct tok_native1 *tn1)
9788d499c80SGordon Ross {
9798d499c80SGordon Ross 	int rc;
9808d499c80SGordon Ross 
9818d499c80SGordon Ross 	rc = smb_mbc_encodef(
982ba55de6bSGordon Ross 	    mbc, "qqqql",
9838d499c80SGordon Ross 	    tn1->tn1_fid.persistent,
9848d499c80SGordon Ross 	    tn1->tn1_fid.temporal,
9858d499c80SGordon Ross 	    tn1->tn1_off,
986ba55de6bSGordon Ross 	    tn1->tn1_eof,
987ba55de6bSGordon Ross 	    tn1->tn1_tid);
9888d499c80SGordon Ross 
9898d499c80SGordon Ross 	return (rc);
9908d499c80SGordon Ross }
991