1eb63303Tom Caputi/*
2eb63303Tom Caputi * CDDL HEADER START
3eb63303Tom Caputi *
4eb63303Tom Caputi * This file and its contents are supplied under the terms of the
5eb63303Tom Caputi * Common Development and Distribution License ("CDDL"), version 1.0.
6eb63303Tom Caputi * You may only use this file in accordance with the terms of version
7eb63303Tom Caputi * 1.0 of the CDDL.
8eb63303Tom Caputi *
9eb63303Tom Caputi * A full copy of the text of the CDDL should have accompanied this
10eb63303Tom Caputi * source.  A copy of the CDDL is also available via the Internet at
11eb63303Tom Caputi * http://www.illumos.org/license/CDDL.
12eb63303Tom Caputi *
13eb63303Tom Caputi * CDDL HEADER END
14eb63303Tom Caputi */
15eb63303Tom Caputi
16eb63303Tom Caputi/*
17eb63303Tom Caputi * Copyright (c) 2017, Datto, Inc. All rights reserved.
18eb63303Tom Caputi */
19eb63303Tom Caputi
20eb63303Tom Caputi#include <sys/zio_crypt.h>
21eb63303Tom Caputi#include <sys/dmu.h>
22eb63303Tom Caputi#include <sys/dmu_objset.h>
23eb63303Tom Caputi#include <sys/dnode.h>
24eb63303Tom Caputi#include <sys/fs/zfs.h>
25eb63303Tom Caputi#include <sys/zio.h>
26eb63303Tom Caputi#include <sys/zil.h>
27eb63303Tom Caputi#include <sys/sha2.h>
28eb63303Tom Caputi#include <sys/hkdf.h>
29eb63303Tom Caputi
30eb63303Tom Caputi/*
31eb63303Tom Caputi * This file is responsible for handling all of the details of generating
32eb63303Tom Caputi * encryption parameters and performing encryption and authentication.
33eb63303Tom Caputi *
34eb63303Tom Caputi * BLOCK ENCRYPTION PARAMETERS:
35eb63303Tom Caputi * Encryption /Authentication Algorithm Suite (crypt):
36eb63303Tom Caputi * The encryption algorithm, mode, and key length we are going to use. We
37eb63303Tom Caputi * currently support AES in either GCM or CCM modes with 128, 192, and 256 bit
38eb63303Tom Caputi * keys. All authentication is currently done with SHA512-HMAC.
39eb63303Tom Caputi *
40eb63303Tom Caputi * Plaintext:
41eb63303Tom Caputi * The unencrypted data that we want to encrypt.
42eb63303Tom Caputi *
43eb63303Tom Caputi * Initialization Vector (IV):
44eb63303Tom Caputi * An initialization vector for the encryption algorithms. This is used to
45eb63303Tom Caputi * "tweak" the encryption algorithms so that two blocks of the same data are
46eb63303Tom Caputi * encrypted into different ciphertext outputs, thus obfuscating block patterns.
47eb63303Tom Caputi * The supported encryption modes (AES-GCM and AES-CCM) require that an IV is
48eb63303Tom Caputi * never reused with the same encryption key. This value is stored unencrypted
49eb63303Tom Caputi * and must simply be provided to the decryption function. We use a 96 bit IV
50eb63303Tom Caputi * (as recommended by NIST) for all block encryption. For non-dedup blocks we
51eb63303Tom Caputi * derive the IV randomly. The first 64 bits of the IV are stored in the second
52eb63303Tom Caputi * word of DVA[2] and the remaining 32 bits are stored in the upper 32 bits of
53eb63303Tom Caputi * blk_fill. This is safe because encrypted blocks can't use the upper 32 bits
54eb63303Tom Caputi * of blk_fill. We only encrypt level 0 blocks, which normally have a fill count
55eb63303Tom Caputi * of 1. The only exception is for DMU_OT_DNODE objects, where the fill count of
56eb63303Tom Caputi * level 0 blocks is the number of allocated dnodes in that block. The on-disk
57eb63303Tom Caputi * format supports at most 2^15 slots per L0 dnode block, because the maximum
58eb63303Tom Caputi * block size is 16MB (2^24). In either case, for level 0 blocks this number
59eb63303Tom Caputi * will still be smaller than UINT32_MAX so it is safe to store the IV in the
60eb63303Tom Caputi * top 32 bits of blk_fill, while leaving the bottom 32 bits of the fill count
61eb63303Tom Caputi * for the dnode code.
62eb63303Tom Caputi *
63eb63303Tom Caputi * Master key:
64eb63303Tom Caputi * This is the most important secret data of an encrypted dataset. It is used
65eb63303Tom Caputi * along with the salt to generate that actual encryption keys via HKDF. We
66eb63303Tom Caputi * do not use the master key to directly encrypt any data because there are
67eb63303Tom Caputi * theoretical limits on how much data can actually be safely encrypted with
68eb63303Tom Caputi * any encryption mode. The master key is stored encrypted on disk with the
69eb63303Tom Caputi * user's wrapping key. Its length is determined by the encryption algorithm.
70eb63303Tom Caputi * For details on how this is stored see the block comment in dsl_crypt.c
71eb63303Tom Caputi *
72eb63303Tom Caputi * Salt:
73eb63303Tom Caputi * Used as an input to the HKDF function, along with the master key. We use a
74eb63303Tom Caputi * 64 bit salt, stored unencrypted in the first word of DVA[2]. Any given salt
75eb63303Tom Caputi * can be used for encrypting many blocks, so we cache the current salt and the
76eb63303Tom Caputi * associated derived key in zio_crypt_t so we do not need to derive it again
77eb63303Tom Caputi * needlessly.
78eb63303Tom Caputi *
79eb63303Tom Caputi * Encryption Key:
80eb63303Tom Caputi * A secret binary key, generated from an HKDF function used to encrypt and
81eb63303Tom Caputi * decrypt data.
82eb63303Tom Caputi *
83eb63303Tom Caputi * Message Authenication Code (MAC)
84eb63303Tom Caputi * The MAC is an output of authenticated encryption modes such as AES-GCM and
85eb63303Tom Caputi * AES-CCM. Its purpose is to ensure that an attacker cannot modify encrypted
86eb63303Tom Caputi * data on disk and return garbage to the application. Effectively, it is a
87eb63303Tom Caputi * checksum that can not be reproduced by an attacker. We store the MAC in the
88eb63303Tom Caputi * second 128 bits of blk_cksum, leaving the first 128 bits for a truncated
89eb63303Tom Caputi * regular checksum of the ciphertext which can be used for scrubbing.
90eb63303Tom Caputi *
91eb63303Tom Caputi * OBJECT AUTHENTICATION:
92eb63303Tom Caputi * Some object types, such as DMU_OT_MASTER_NODE cannot be encrypted because
93eb63303Tom Caputi * they contain some info that always needs to be readable. To prevent this
94eb63303Tom Caputi * data from being altered, we authenticate this data using SHA512-HMAC. This
95eb63303Tom Caputi * will produce a MAC (similar to the one produced via encryption) which can
96eb63303Tom Caputi * be used to verify the object was not modified. HMACs do not require key
97eb63303Tom Caputi * rotation or IVs, so we can keep up to the full 3 copies of authenticated
98eb63303Tom Caputi * data.
99eb63303Tom Caputi *
100eb63303Tom Caputi * ZIL ENCRYPTION:
101eb63303Tom Caputi * ZIL blocks have their bp written to disk ahead of the associated data, so we
102eb63303Tom Caputi * cannot store the MAC there as we normally do. For these blocks the MAC is
103eb63303Tom Caputi * stored in the embedded checksum within the zil_chain_t header. The salt and
104eb63303Tom Caputi * IV are generated for the block on bp allocation instead of at encryption
105eb63303Tom Caputi * time. In addition, ZIL blocks have some pieces that must be left in plaintext
106eb63303Tom Caputi * for claiming even though all of the sensitive user data still needs to be
107eb63303Tom Caputi * encrypted. The function zio_crypt_init_uios_zil() handles parsing which
108eb63303Tom Caputi * pieces of the block need to be encrypted. All data that is not encrypted is
109eb63303Tom Caputi * authenticated using the AAD mechanisms that the supported encryption modes
110eb63303Tom Caputi * provide for. In order to preserve the semantics of the ZIL for encrypted
111eb63303Tom Caputi * datasets, the ZIL is not protected at the objset level as described below.
112eb63303Tom Caputi *
113eb63303Tom Caputi * DNODE ENCRYPTION:
114eb63303Tom Caputi * Similarly to ZIL blocks, the core part of each dnode_phys_t needs to be left
115eb63303Tom Caputi * in plaintext for scrubbing and claiming, but the bonus buffers might contain
116eb63303Tom Caputi * sensitive user data. The function zio_crypt_init_uios_dnode() handles parsing
117eb63303Tom Caputi * which pieces of the block need to be encrypted. For more details about
118eb63303Tom Caputi * dnode authentication and encryption, see zio_crypt_init_uios_dnode().
119eb63303Tom Caputi *
120eb63303Tom Caputi * OBJECT SET AUTHENTICATION:
121eb63303Tom Caputi * Up to this point, everything we have encrypted and authenticated has been
122eb63303Tom Caputi * at level 0 (or -2 for the ZIL). If we did not do any further work the
123eb63303Tom Caputi * on-disk format would be susceptible to attacks that deleted or rearrannged
124eb63303Tom Caputi * the order of level 0 blocks. Ideally, the cleanest solution would be to
125eb63303Tom Caputi * maintain a tree of authentication MACs going up the bp tree. However, this
126eb63303Tom Caputi * presents a problem for raw sends. Send files do not send information about
127eb63303Tom Caputi * indirect blocks so there would be no convenient way to transfer the MACs and
128eb63303Tom Caputi * they cannot be recalculated on the receive side without the master key which
129eb63303Tom Caputi * would defeat one of the purposes of raw sends in the first place. Instead,
130eb63303Tom Caputi * for the indirect levels of the bp tree, we use a regular SHA512 of the MACs
131eb63303Tom Caputi * from the level below. We also include some portable fields from blk_prop such
132eb63303Tom Caputi * as the lsize and compression algorithm to prevent the data from being
133eb63303Tom Caputi * misinterpretted.
134eb63303Tom Caputi *
135eb63303Tom Caputi * At the objset level, we maintain 2 seperate 256 bit MACs in the
136eb63303Tom Caputi * objset_phys_t. The first one is "portable" and is the logical root of the
137eb63303Tom Caputi * MAC tree maintianed in the metadnode's bps. The second, is "local" and is
138eb63303Tom Caputi * used as the root MAC for the user accounting objects, which are also not
139eb63303Tom Caputi * transferred via "zfs send". The portable MAC is sent in the DRR_BEGIN payload
140eb63303Tom Caputi * of the send file. The useraccounting code ensures that the useraccounting
141eb63303Tom Caputi * info is not present upon a receive, so the local MAC can simply be cleared
142eb63303Tom Caputi * out at that time. For more info about objset_phys_t authentication, see
143eb63303Tom Caputi * zio_crypt_do_objset_hmacs().
144eb63303Tom Caputi *
145eb63303Tom Caputi * CONSIDERATIONS FOR DEDUP:
146eb63303Tom Caputi * In order for dedup to work, blocks that we want to dedup with one another
147eb63303Tom Caputi * need to use the same IV and encryption key, so that they will have the same
148eb63303Tom Caputi * ciphertext. Normally, one should never reuse an IV with the same encryption
149eb63303Tom Caputi * key or else AES-GCM and AES-CCM can both actually leak the plaintext of both
150eb63303Tom Caputi * blocks. In this case, however, since we are using the same plaindata as
151eb63303Tom Caputi * well all that we end up with is a duplicate of the original ciphertext we
152eb63303Tom Caputi * already had. As a result, an attacker with read access to the raw disk will
153eb63303Tom Caputi * be able to tell which blocks are the same but this information is given away
154eb63303Tom Caputi * by dedup anyway. In order to get the same IVs and encryption keys for
155eb63303Tom Caputi * equivalent blocks of data we use an HMAC of the plaindata. We use an HMAC
156eb63303Tom Caputi * here so that a reproducible checksum of the plaindata is never available to
157eb63303Tom Caputi * the attacker. The HMAC key is kept alongside the master key, encrypted on
158eb63303Tom Caputi * disk. The first 64 bits of the HMAC are used in place of the random salt, and
159eb63303Tom Caputi * the next 96 bits are used as the IV. As a result of this mechanism, dedup
160eb63303Tom Caputi * will only work within a clone family since encrypted dedup requires use of
161eb63303Tom Caputi * the same master and HMAC keys.
162eb63303Tom Caputi */
163eb63303Tom Caputi
164eb63303Tom Caputi/*
165eb63303Tom Caputi * After encrypting many blocks with the same key we may start to run up
166eb63303Tom Caputi * against the theoretical limits of how much data can securely be encrypted
167eb63303Tom Caputi * with a single key using the supported encryption modes. The most obvious
168eb63303Tom Caputi * limitation is that our risk of generating 2 equivalent 96 bit IVs increases
169eb63303Tom Caputi * the more IVs we generate (which both GCM and CCM modes strictly forbid).
170eb63303Tom Caputi * This risk actually grows surprisingly quickly over time according to the
171eb63303Tom Caputi * Birthday Problem. With a total IV space of 2^(96 bits), and assuming we have
172eb63303Tom Caputi * generated n IVs with a cryptographically secure RNG, the approximate
173eb63303Tom Caputi * probability p(n) of a collision is given as:
174eb63303Tom Caputi *
175eb63303Tom Caputi * p(n) ~= e^(-n*(n-1)/(2*(2^96)))
176eb63303Tom Caputi *
177eb63303Tom Caputi * [http://www.math.cornell.edu/~mec/2008-2009/TianyiZheng/Birthday.html]
178eb63303Tom Caputi *
179eb63303Tom Caputi * Assuming that we want to ensure that p(n) never goes over 1 / 1 trillion
180eb63303Tom Caputi * we must not write more than 398,065,730 blocks with the same encryption key.
181eb63303Tom Caputi * Therefore, we rotate our keys after 400,000,000 blocks have been written by
182eb63303Tom Caputi * generating a new random 64 bit salt for our HKDF encryption key generation
183eb63303Tom Caputi * function.
184eb63303Tom Caputi */
185eb63303Tom Caputi#define	ZFS_KEY_MAX_SALT_USES_DEFAULT	400000000
186eb63303Tom Caputi#define	ZFS_CURRENT_MAX_SALT_USES	\
187eb63303Tom Caputi	(MIN(zfs_key_max_salt_uses, ZFS_KEY_MAX_SALT_USES_DEFAULT))
188eb63303Tom Caputiunsigned long zfs_key_max_salt_uses = ZFS_KEY_MAX_SALT_USES_DEFAULT;
189eb63303Tom Caputi
190eb63303Tom Caputi/*
191eb63303Tom Caputi * Set to a nonzero value to cause zio_do_crypt_uio() to fail 1/this many
192eb63303Tom Caputi * calls, to test decryption error handling code paths.
193eb63303Tom Caputi */
194eb63303Tom Caputiuint64_t zio_decrypt_fail_fraction = 0;
195eb63303Tom Caputi
196eb63303Tom Caputitypedef struct blkptr_auth_buf {
197eb63303Tom Caputi	uint64_t bab_prop;			/* blk_prop - portable mask */
198eb63303Tom Caputi	uint8_t bab_mac[ZIO_DATA_MAC_LEN];	/* MAC from blk_cksum */
199eb63303Tom Caputi	uint64_t bab_pad;			/* reserved for future use */
200eb63303Tom Caputi} blkptr_auth_buf_t;
201eb63303Tom Caputi
202eb63303Tom Caputizio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS] = {
203eb63303Tom Caputi	{"",			ZC_TYPE_NONE,	0,	"inherit"},
204eb63303Tom Caputi	{"",			ZC_TYPE_NONE,	0,	"on"},
205eb63303Tom Caputi	{"",			ZC_TYPE_NONE,	0,	"off"},
206eb63303Tom Caputi	{SUN_CKM_AES_CCM,	ZC_TYPE_CCM,	16,	"aes-128-ccm"},
207eb63303Tom Caputi	{SUN_CKM_AES_CCM,	ZC_TYPE_CCM,	24,	"aes-192-ccm"},
208eb63303Tom Caputi	{SUN_CKM_AES_CCM,	ZC_TYPE_CCM,	32,	"aes-256-ccm"},
209eb63303Tom Caputi	{SUN_CKM_AES_GCM,	ZC_TYPE_GCM,	16,	"aes-128-gcm"},
210eb63303Tom Caputi	{SUN_CKM_AES_GCM,	ZC_TYPE_GCM,	24,	"aes-192-gcm"},
211eb63303Tom Caputi	{SUN_CKM_AES_GCM,	ZC_TYPE_GCM,	32,	"aes-256-gcm"}
212eb63303Tom Caputi};
213eb63303Tom Caputi
214eb63303Tom Caputivoid
215eb63303Tom Caputizio_crypt_key_destroy(zio_crypt_key_t *key)
216eb63303Tom Caputi{
217eb63303Tom Caputi	rw_destroy(&key->zk_salt_lock);
218eb63303Tom Caputi
219eb63303Tom Caputi	/* free crypto templates */
220eb63303Tom Caputi	crypto_destroy_ctx_template(key->zk_current_tmpl);
221eb63303Tom Caputi	crypto_destroy_ctx_template(key->zk_hmac_tmpl);
222eb63303Tom Caputi
223eb63303Tom Caputi	/* zero out sensitive data */
224eb63303Tom Caputi	bzero(key, sizeof (zio_crypt_key_t));
225eb63303Tom Caputi}
226eb63303Tom Caputi
227eb63303Tom Caputiint
228eb63303Tom Caputizio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key)
229eb63303Tom Caputi{
230eb63303Tom Caputi	int ret;
231eb63303Tom Caputi	crypto_mechanism_t mech;
232eb63303Tom Caputi	uint_t keydata_len;
233eb63303Tom Caputi
234eb63303Tom Caputi	ASSERT(key != NULL);
235eb63303Tom Caputi	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
236eb63303Tom Caputi
237eb63303Tom Caputi	keydata_len = zio_crypt_table[crypt].ci_keylen;
238eb63303Tom Caputi	bzero(key, sizeof (zio_crypt_key_t));
239eb63303Tom Caputi
240eb63303Tom Caputi	/* fill keydata buffers and salt with random data */
241eb63303Tom Caputi	ret = random_get_bytes((uint8_t *)&key->zk_guid, sizeof (uint64_t));
242eb63303Tom Caputi	if (ret != 0)
243eb63303Tom Caputi		goto error;
244eb63303Tom Caputi
245eb63303Tom Caputi	ret = random_get_bytes(key->zk_master_keydata, keydata_len);
246eb63303Tom Caputi	if (ret != 0)
247eb63303Tom Caputi		goto error;
248eb63303Tom Caputi
249eb63303Tom Caputi	ret = random_get_bytes(key->zk_hmac_keydata, SHA512_HMAC_KEYLEN);
250eb63303Tom Caputi	if (ret != 0)
251eb63303Tom Caputi		goto error;
252eb63303Tom Caputi
253eb63303Tom Caputi	ret = random_get_bytes(key->zk_salt, ZIO_DATA_SALT_LEN);
254eb63303Tom Caputi	if (ret != 0)
255eb63303Tom Caputi		goto error;
256eb63303Tom Caputi
257eb63303Tom Caputi	/* derive the current key from the master key */
258eb63303Tom Caputi	ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
259eb63303Tom Caputi	    key->zk_salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata,
260eb63303Tom Caputi	    keydata_len);
261eb63303Tom Caputi	if (ret != 0)
262