18dab5b0mav/*-
2a82e3a8pfg * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3a82e3a8pfg *
48dab5b0mav * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
5c283985mav * Copyright (c) 2000 - 2008 S��ren Schmidt <sos@FreeBSD.org>
68dab5b0mav * All rights reserved.
78dab5b0mav *
88dab5b0mav * Redistribution and use in source and binary forms, with or without
98dab5b0mav * modification, are permitted provided that the following conditions
108dab5b0mav * are met:
118dab5b0mav * 1. Redistributions of source code must retain the above copyright
128dab5b0mav *    notice, this list of conditions and the following disclaimer.
138dab5b0mav * 2. Redistributions in binary form must reproduce the above copyright
148dab5b0mav *    notice, this list of conditions and the following disclaimer in the
158dab5b0mav *    documentation and/or other materials provided with the distribution.
168dab5b0mav *
178dab5b0mav * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
188dab5b0mav * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
198dab5b0mav * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
208dab5b0mav * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
218dab5b0mav * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
228dab5b0mav * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
238dab5b0mav * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
248dab5b0mav * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
258dab5b0mav * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
268dab5b0mav * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
278dab5b0mav * SUCH DAMAGE.
288dab5b0mav */
298dab5b0mav
308dab5b0mav#include <sys/cdefs.h>
318dab5b0mav__FBSDID("$FreeBSD$");
328dab5b0mav
338dab5b0mav#include <sys/param.h>
348dab5b0mav#include <sys/bio.h>
358dab5b0mav#include <sys/endian.h>
368dab5b0mav#include <sys/kernel.h>
378dab5b0mav#include <sys/kobj.h>
388dab5b0mav#include <sys/limits.h>
398dab5b0mav#include <sys/lock.h>
408dab5b0mav#include <sys/malloc.h>
418dab5b0mav#include <sys/mutex.h>
428dab5b0mav#include <sys/systm.h>
438dab5b0mav#include <sys/taskqueue.h>
442e02693sbruno#include <sys/disk.h>
458dab5b0mav#include <geom/geom.h>
4610d53fccem#include <geom/geom_dbg.h>
478dab5b0mav#include "geom/raid/g_raid.h"
488dab5b0mav#include "g_raid_md_if.h"
498dab5b0mav
508dab5b0mavstatic MALLOC_DEFINE(M_MD_INTEL, "md_intel_data", "GEOM_RAID Intel metadata");
518dab5b0mav
528dab5b0mavstruct intel_raid_map {
538dab5b0mav	uint32_t	offset;
548dab5b0mav	uint32_t	disk_sectors;
558dab5b0mav	uint32_t	stripe_count;
568dab5b0mav	uint16_t	strip_sectors;
578dab5b0mav	uint8_t		status;
588dab5b0mav#define INTEL_S_READY           0x00
598dab5b0mav#define INTEL_S_UNINITIALIZED   0x01
608dab5b0mav#define INTEL_S_DEGRADED        0x02
618dab5b0mav#define INTEL_S_FAILURE         0x03
628dab5b0mav
638dab5b0mav	uint8_t		type;
648dab5b0mav#define INTEL_T_RAID0           0x00
658dab5b0mav#define INTEL_T_RAID1           0x01
668dab5b0mav#define INTEL_T_RAID5           0x05
678dab5b0mav
688dab5b0mav	uint8_t		total_disks;
698dab5b0mav	uint8_t		total_domains;
708dab5b0mav	uint8_t		failed_disk_num;
718dab5b0mav	uint8_t		ddf;
727b24e93jimharris	uint32_t	offset_hi;
737b24e93jimharris	uint32_t	disk_sectors_hi;
747b24e93jimharris	uint32_t	stripe_count_hi;
757b24e93jimharris	uint32_t	filler_2[4];
768dab5b0mav	uint32_t	disk_idx[1];	/* total_disks entries. */
778dab5b0mav#define INTEL_DI_IDX	0x00ffffff
788dab5b0mav#define INTEL_DI_RBLD	0x01000000
798dab5b0mav} __packed;
808dab5b0mav
818dab5b0mavstruct intel_raid_vol {
828dab5b0mav	uint8_t		name[16];
838dab5b0mav	u_int64_t	total_sectors __packed;
848dab5b0mav	uint32_t	state;
858dab5b0mav#define INTEL_ST_BOOTABLE		0x00000001
868dab5b0mav#define INTEL_ST_BOOT_DEVICE		0x00000002
878dab5b0mav#define INTEL_ST_READ_COALESCING	0x00000004
888dab5b0mav#define INTEL_ST_WRITE_COALESCING	0x00000008
898dab5b0mav#define INTEL_ST_LAST_SHUTDOWN_DIRTY	0x00000010
908dab5b0mav#define INTEL_ST_HIDDEN_AT_BOOT		0x00000020
918dab5b0mav#define INTEL_ST_CURRENTLY_HIDDEN	0x00000040
928dab5b0mav#define INTEL_ST_VERIFY_AND_FIX		0x00000080
938dab5b0mav#define INTEL_ST_MAP_STATE_UNINIT	0x00000100
948dab5b0mav#define INTEL_ST_NO_AUTO_RECOVERY	0x00000200
958dab5b0mav#define INTEL_ST_CLONE_N_GO		0x00000400
968dab5b0mav#define INTEL_ST_CLONE_MAN_SYNC		0x00000800
978dab5b0mav#define INTEL_ST_CNG_MASTER_DISK_NUM	0x00001000
988dab5b0mav	uint32_t	reserved;
998dab5b0mav	uint8_t		migr_priority;
1008dab5b0mav	uint8_t		num_sub_vols;
1018dab5b0mav	uint8_t		tid;
1028dab5b0mav	uint8_t		cng_master_disk;
1038dab5b0mav	uint16_t	cache_policy;
1048dab5b0mav	uint8_t		cng_state;
1057ed3ee1mav#define INTEL_CNGST_UPDATED		0
1067ed3ee1mav#define INTEL_CNGST_NEEDS_UPDATE	1
1077ed3ee1mav#define INTEL_CNGST_MASTER_MISSING	2
1088dab5b0mav	uint8_t		cng_sub_state;
1098dab5b0mav	uint32_t	filler_0[10];
1108dab5b0mav
1118dab5b0mav	uint32_t	curr_migr_unit;
1128dab5b0mav	uint32_t	checkpoint_id;
1138dab5b0mav	uint8_t		migr_state;
1148dab5b0mav	uint8_t		migr_type;
1158dab5b0mav#define INTEL_MT_INIT		0
1168dab5b0mav#define INTEL_MT_REBUILD	1
1178dab5b0mav#define INTEL_MT_VERIFY		2
1188dab5b0mav#define INTEL_MT_GEN_MIGR	3
1198dab5b0mav#define INTEL_MT_STATE_CHANGE	4
1208dab5b0mav#define INTEL_MT_REPAIR		5
1218dab5b0mav	uint8_t		dirty;
1228dab5b0mav	uint8_t		fs_state;
1238dab5b0mav	uint16_t	verify_errors;
1248dab5b0mav	uint16_t	bad_blocks;
1257b24e93jimharris	uint32_t	curr_migr_unit_hi;
1267b24e93jimharris	uint32_t	filler_1[3];
1278dab5b0mav	struct intel_raid_map map[1];	/* 2 entries if migr_state != 0. */
1288dab5b0mav} __packed;
1298dab5b0mav
1308dab5b0mavstruct intel_raid_disk {
1318dab5b0mav#define INTEL_SERIAL_LEN	16
1328dab5b0mav	uint8_t		serial[INTEL_SERIAL_LEN];
1338dab5b0mav	uint32_t	sectors;
1348dab5b0mav	uint32_t	id;
1358dab5b0mav	uint32_t	flags;
1368dab5b0mav#define INTEL_F_SPARE		0x01
1378dab5b0mav#define INTEL_F_ASSIGNED	0x02
1388dab5b0mav#define INTEL_F_FAILED		0x04
1398dab5b0mav#define INTEL_F_ONLINE		0x08
1402a61b08mav#define INTEL_F_DISABLED	0x80
1417b24e93jimharris	uint32_t	owner_cfg_num;
1427b24e93jimharris	uint32_t	sectors_hi;
1437b24e93jimharris	uint32_t	filler[3];
1448dab5b0mav} __packed;
1458dab5b0mav
1468dab5b0mavstruct intel_raid_conf {
1478dab5b0mav	uint8_t		intel_id[24];
1488dab5b0mav#define INTEL_MAGIC             "Intel Raid ISM Cfg Sig. "
1498dab5b0mav
1508dab5b0mav	uint8_t		version[6];
1518dab5b0mav#define INTEL_VERSION_1000	"1.0.00"	/* RAID0 */
1528dab5b0mav#define INTEL_VERSION_1100	"1.1.00"	/* RAID1 */
1538dab5b0mav#define INTEL_VERSION_1200	"1.2.00"	/* Many volumes */
1548dab5b0mav#define INTEL_VERSION_1201	"1.2.01"	/* 3 or 4 disks */
1558dab5b0mav#define INTEL_VERSION_1202	"1.2.02"	/* RAID5 */
1568dab5b0mav#define INTEL_VERSION_1204	"1.2.04"	/* 5 or 6 disks */
1578dab5b0mav#define INTEL_VERSION_1206	"1.2.06"	/* CNG */
1588dab5b0mav#define INTEL_VERSION_1300	"1.3.00"	/* Attributes */
1598dab5b0mav
1608dab5b0mav	uint8_t		dummy_0[2];
1618dab5b0mav	uint32_t	checksum;
1628dab5b0mav	uint32_t	config_size;
1638dab5b0mav	uint32_t	config_id;
1648dab5b0mav	uint32_t	generation;
1658dab5b0mav	uint32_t	error_log_size;
1668dab5b0mav	uint32_t	attributes;
1678dab5b0mav#define INTEL_ATTR_RAID0	0x00000001
1688dab5b0mav#define INTEL_ATTR_RAID1	0x00000002
1698dab5b0mav#define INTEL_ATTR_RAID10	0x00000004
1708dab5b0mav#define INTEL_ATTR_RAID1E	0x00000008
1718dab5b0mav#define INTEL_ATTR_RAID5	0x00000010
1728dab5b0mav#define INTEL_ATTR_RAIDCNG	0x00000020
17344f703amav#define INTEL_ATTR_EXT_STRIP	0x00000040
17444f703amav#define INTEL_ATTR_NVM_CACHE	0x02000000
17544f703amav#define INTEL_ATTR_2TB_DISK	0x04000000
17644f703amav#define INTEL_ATTR_BBM		0x08000000
17744f703amav#define INTEL_ATTR_NVM_CACHE2	0x10000000
1788dab5b0mav#define INTEL_ATTR_2TB		0x20000000
1798dab5b0mav#define INTEL_ATTR_PM		0x40000000
1808dab5b0mav#define INTEL_ATTR_CHECKSUM	0x80000000
1818dab5b0mav
1828dab5b0mav	uint8_t		total_disks;
1838dab5b0mav	uint8_t		total_volumes;
184960e9d0mav	uint8_t		error_log_pos;
185960e9d0mav	uint8_t		dummy_2[1];
186960e9d0mav	uint32_t	cache_size;
187163aff2mav	uint32_t	orig_config_id;
188960e9d0mav	uint32_t	pwr_cycle_count;
189960e9d0mav	uint32_t	bbm_log_size;
190960e9d0mav	uint32_t	filler_0[35];
1918dab5b0mav	struct intel_raid_disk	disk[1];	/* total_disks entries. */
1928dab5b0mav	/* Here goes total_volumes of struct intel_raid_vol. */
1938dab5b0mav} __packed;
1948dab5b0mav
19544f703amav#define INTEL_ATTR_SUPPORTED	( INTEL_ATTR_RAID0 | INTEL_ATTR_RAID1 |	\
19644f703amav    INTEL_ATTR_RAID10 | INTEL_ATTR_RAID1E | INTEL_ATTR_RAID5 |		\
19744f703amav    INTEL_ATTR_RAIDCNG | INTEL_ATTR_EXT_STRIP | INTEL_ATTR_2TB_DISK |	\
19844f703amav    INTEL_ATTR_2TB | INTEL_ATTR_PM | INTEL_ATTR_CHECKSUM )
19944f703amav
2008dab5b0mav#define INTEL_MAX_MD_SIZE(ndisks)				\
2018dab5b0mav    (sizeof(struct intel_raid_conf) +				\
2028dab5b0mav     sizeof(struct intel_raid_disk) * (ndisks - 1) +		\
2038dab5b0mav     sizeof(struct intel_raid_vol) * 2 +			\
2048dab5b0mav     sizeof(struct intel_raid_map) * 2 +			\
2058dab5b0mav     sizeof(uint32_t) * (ndisks - 1) * 4)
2068dab5b0mav
2078dab5b0mavstruct g_raid_md_intel_perdisk {
2088dab5b0mav	struct intel_raid_conf	*pd_meta;
2098dab5b0mav	int			 pd_disk_pos;
2108dab5b0mav	struct intel_raid_disk	 pd_disk_meta;
2118dab5b0mav};
2128dab5b0mav
2132a61b08mavstruct g_raid_md_intel_pervolume {
2142a61b08mav	int			 pv_volume_pos;
2152a61b08mav	int			 pv_cng;
2162a61b08mav	int			 pv_cng_man_sync;
2172a61b08mav	int			 pv_cng_master_disk;
2182a61b08mav};
2192a61b08mav
2208dab5b0mavstruct g_raid_md_intel_object {
2218dab5b0mav	struct g_raid_md_object	 mdio_base;
2228dab5b0mav	uint32_t		 mdio_config_id;
2231d08afcmav	uint32_t		 mdio_orig_config_id;
2248dab5b0mav	uint32_t		 mdio_generation;
2258dab5b0mav	struct intel_raid_conf	*mdio_meta;
2268dab5b0mav	struct callout		 mdio_start_co;	/* STARTING state timer. */
2278dab5b0mav	int			 mdio_disks_present;
2288dab5b0mav	int			 mdio_started;
2298dab5b0mav	int			 mdio_incomplete;
2308dab5b0mav	struct root_hold_token	*mdio_rootmount; /* Root mount delay token. */
2318dab5b0mav};
2328dab5b0mav
2338dab5b0mavstatic g_raid_md_create_t g_raid_md_create_intel;
2348dab5b0mavstatic g_raid_md_taste_t g_raid_md_taste_intel;
2358dab5b0mavstatic g_raid_md_event_t g_raid_md_event_intel;
2368dab5b0mavstatic g_raid_md_ctl_t g_raid_md_ctl_intel;
2378dab5b0mavstatic g_raid_md_write_t g_raid_md_write_intel;
2388dab5b0mavstatic g_raid_md_fail_disk_t g_raid_md_fail_disk_intel;
2398dab5b0mavstatic g_raid_md_free_disk_t g_raid_md_free_disk_intel;
2402a61b08mavstatic g_raid_md_free_volume_t g_raid_md_free_volume_intel;
2418dab5b0mavstatic g_raid_md_free_t g_raid_md_free_intel;
2428dab5b0mav
2438dab5b0mavstatic kobj_method_t g_raid_md_intel_methods[] = {
2448dab5b0mav	KOBJMETHOD(g_raid_md_create,	g_raid_md_create_intel),
2458dab5b0mav	KOBJMETHOD(g_raid_md_taste,	g_raid_md_taste_intel),
2468dab5b0mav	KOBJMETHOD(g_raid_md_event,	g_raid_md_event_intel),
2478dab5b0mav	KOBJMETHOD(g_raid_md_ctl,	g_raid_md_ctl_intel),
2488dab5b0mav	KOBJMETHOD(g_raid_md_write,	g_raid_md_write_intel),
2498dab5b0mav	KOBJMETHOD(g_raid_md_fail_disk,	g_raid_md_fail_disk_intel),
2508dab5b0mav	KOBJMETHOD(g_raid_md_free_disk,	g_raid_md_free_disk_intel),
2512a61b08mav	KOBJMETHOD(g_raid_md_free_volume,	g_raid_md_free_volume_intel),
2528dab5b0mav	KOBJMETHOD(g_raid_md_free,	g_raid_md_free_intel),
2538dab5b0mav	{ 0, 0 }
2548dab5b0mav};
2558dab5b0mav
2568dab5b0mavstatic struct g_raid_md_class g_raid_md_intel_class = {
2578dab5b0mav	"Intel",
2588dab5b0mav	g_raid_md_intel_methods,
2598dab5b0mav	sizeof(struct g_raid_md_intel_object),
260db9e01amav	.mdc_enable = 1,
2618dab5b0mav	.mdc_priority = 100
2628dab5b0mav};
2638dab5b0mav
2648dab5b0mavstatic struct intel_raid_map *
2658dab5b0mavintel_get_map(struct intel_raid_vol *mvol, int i)
2668dab5b0mav{
2678dab5b0mav	struct intel_raid_map *mmap;
2688dab5b0mav
2698dab5b0mav	if (i > (mvol->migr_state ? 1 : 0))
2708dab5b0mav		return (NULL);
2718dab5b0mav	mmap = &mvol->map[0];
2728dab5b0mav	for (; i > 0; i--) {
2738dab5b0mav		mmap = (struct intel_raid_map *)
2748dab5b0mav		    &mmap->disk_idx[mmap->total_disks];
2758dab5b0mav	}
2768dab5b0mav	return ((struct intel_raid_map *)mmap);
2778dab5b0mav}
2788dab5b0mav
2798dab5b0mavstatic struct intel_raid_vol *
2808dab5b0mavintel_get_volume(struct intel_raid_conf *meta, int i)
2818dab5b0mav{
2828dab5b0mav	struct intel_raid_vol *mvol;
2838dab5b0mav	struct intel_raid_map *mmap;
2848dab5b0mav
2858dab5b0mav	if (i > 1)
2868dab5b0mav		return (NULL);
2878dab5b0mav	mvol = (struct intel_raid_vol *)&meta->disk[meta->total_disks];
2888dab5b0mav	for (; i > 0; i--) {
2898dab5b0mav		mmap = intel_get_map(mvol, mvol->migr_state ? 1 : 0);
2908dab5b0mav		mvol = (struct intel_raid_vol *)
2918dab5b0mav		    &mmap->disk_idx[mmap->total_disks];
2928dab5b0mav	}
2938dab5b0mav	return (mvol);
2948dab5b0mav}
2958dab5b0mav
2967b24e93jimharrisstatic off_t
2977b24e93jimharrisintel_get_map_offset(struct intel_raid_map *mmap)
2987b24e93jimharris{
2997b24e93jimharris	off_t offset = (off_t)mmap->offset_hi << 32;
3007b24e93jimharris
3017b24e93jimharris	offset += mmap->offset;
3027b24e93jimharris	return (offset);
3037b24e93jimharris}
3047b24e93jimharris
3057b24e93jimharrisstatic void
3067b24e93jimharrisintel_set_map_offset(struct intel_raid_map *mmap, off_t offset)
3077b24e93jimharris{
3087b24e93jimharris
3097b24e93jimharris	mmap->offset = offset & 0xffffffff;
3107b24e93jimharris	mmap->offset_hi = offset >> 32;
3117b24e93jimharris}
3127b24e93jimharris
3137b24e93jimharrisstatic off_t
3147b24e93jimharrisintel_get_map_disk_sectors(struct intel_raid_map *mmap)
3157b24e93jimharris{
3167b24e93jimharris	off_t disk_sectors = (off_t)mmap->disk_sectors_hi << 32;
3177b24e93jimharris
3187b24e93jimharris	disk_sectors += mmap->disk_sectors;
3197b24e93jimharris	return (disk_sectors);
3207b24e93jimharris}
3217b24e93jimharris
3227b24e93jimharrisstatic void
3237b24e93jimharrisintel_set_map_disk_sectors(struct intel_raid_map *mmap, off_t disk_sectors)
3247b24e93jimharris{
3257b24e93jimharris
3267b24e93jimharris	mmap->disk_sectors = disk_sectors & 0xffffffff;
3277b24e93jimharris	mmap->disk_sectors_hi = disk_sectors >> 32;
3287b24e93jimharris}
3297b24e93jimharris
3307b24e93jimharrisstatic void
3317b24e93jimharrisintel_set_map_stripe_count(struct intel_raid_map *mmap, off_t stripe_count)
3327b24e93jimharris{
3337b24e93jimharris
3347b24e93jimharris	mmap->stripe_count = stripe_count & 0xffffffff;
3357b24e93jimharris	mmap->stripe_count_hi = stripe_count >> 32;
3367b24e93jimharris}
3377b24e93jimharris
3387b24e93jimharrisstatic off_t
3397b24e93jimharrisintel_get_disk_sectors(struct intel_raid_disk *disk)
3407b24e93jimharris{
3417b24e93jimharris	off_t sectors = (off_t)disk->sectors_hi << 32;
3427b24e93jimharris
3437b24e93jimharris	sectors += disk->sectors;
3447b24e93jimharris	return (sectors);
3457b24e93jimharris}
3467b24e93jimharris
3477b24e93jimharrisstatic void
3487b24e93jimharrisintel_set_disk_sectors(struct intel_raid_disk *disk, off_t sectors)
3497b24e93jimharris{
3507b24e93jimharris
3517b24e93jimharris	disk->sectors = sectors & 0xffffffff;
3527b24e93jimharris	disk->sectors_hi = sectors >> 32;
3537b24e93jimharris}
3547b24e93jimharris
3557b24e93jimharrisstatic off_t
3567b24e93jimharrisintel_get_vol_curr_migr_unit(struct intel_raid_vol *vol)
3577b24e93jimharris{
3587b24e93jimharris	off_t curr_migr_unit = (off_t)vol->curr_migr_unit_hi << 32;
3597b24e93jimharris
3607b24e93jimharris	curr_migr_unit += vol->curr_migr_unit;
3617b24e93jimharris	return (curr_migr_unit);
3627b24e93jimharris}
3637b24e93jimharris
3647b24e93jimharrisstatic void
3657b24e93jimharrisintel_set_vol_curr_migr_unit(struct intel_raid_vol *vol, off_t curr_migr_unit)
3667b24e93jimharris{
3677b24e93jimharris
3687b24e93jimharris	vol->curr_migr_unit = curr_migr_unit & 0xffffffff;
3697b24e93jimharris	vol->curr_migr_unit_hi = curr_migr_unit >> 32;
3707b24e93jimharris}
3717b24e93jimharris
372173190cmavstatic char *
373173190cmavintel_status2str(int status)
374173190cmav{
375173190cmav
376173190cmav	switch (status) {
377173190cmav	case INTEL_S_READY:
378173190cmav		return ("READY");
379173190cmav	case INTEL_S_UNINITIALIZED:
380173190cmav		return ("UNINITIALIZED");
381173190cmav	case INTEL_S_DEGRADED:
382173190cmav		return ("DEGRADED");
383173190cmav	case INTEL_S_FAILURE:
384173190cmav		return ("FAILURE");
385173190cmav	default:
386173190cmav		return ("UNKNOWN");
387173190cmav	}
388173190cmav}
389173190cmav
390173190cmavstatic char *
391173190cmavintel_type2str(int type)
392173190cmav{
393173190cmav
394