1*b73ccab0SMike Gerdts#!/bin/ksh -p
2*b73ccab0SMike Gerdts#
3*b73ccab0SMike Gerdts# This file and its contents are supplied under the terms of the
4*b73ccab0SMike Gerdts# Common Development and Distribution License ("CDDL"), version 1.0.
5*b73ccab0SMike Gerdts# You may only use this file in accordance with the terms of version
6*b73ccab0SMike Gerdts# 1.0 of the CDDL.
7*b73ccab0SMike Gerdts#
8*b73ccab0SMike Gerdts# A full copy of the text of the CDDL should have accompanied this
9*b73ccab0SMike Gerdts# source.  A copy of the CDDL is also available via the Internet at
10*b73ccab0SMike Gerdts# http://www.illumos.org/license/CDDL.
11*b73ccab0SMike Gerdts#
12*b73ccab0SMike Gerdts
13*b73ccab0SMike Gerdts#
14*b73ccab0SMike Gerdts# Copyright 2019 Joyent, Inc.
15*b73ccab0SMike Gerdts#
16*b73ccab0SMike Gerdts
17*b73ccab0SMike Gerdts. $STF_SUITE/include/libtest.shlib
18*b73ccab0SMike Gerdts. $STF_SUITE/tests/functional/refreserv/refreserv.cfg
19*b73ccab0SMike Gerdts
20*b73ccab0SMike Gerdts#
21*b73ccab0SMike Gerdts# DESCRIPTION:
22*b73ccab0SMike Gerdts#	raidz refreservation=auto picks worst raidz vdev
23*b73ccab0SMike Gerdts#
24*b73ccab0SMike Gerdts# STRATEGY:
25*b73ccab0SMike Gerdts#	1. Create a pool with a single raidz vdev
26*b73ccab0SMike Gerdts#	2. For each block size [512b, 1k, 128k] or [4k, 8k, 128k]
27*b73ccab0SMike Gerdts#	    - create a volume
28*b73ccab0SMike Gerdts#	    - remember its refreservation
29*b73ccab0SMike Gerdts#	    - destroy the volume
30*b73ccab0SMike Gerdts#	3. Destroy the pool
31*b73ccab0SMike Gerdts#	4. Recreate the pool with one more disk in the vdev, then repeat steps
32*b73ccab0SMike Gerdts#	   2 and 3.
33*b73ccab0SMike Gerdts#
34*b73ccab0SMike Gerdts# NOTES:
35*b73ccab0SMike Gerdts#	1. This test will use up to 14 disks but can cover the key concepts with
36*b73ccab0SMike Gerdts#	   5 disks.
37*b73ccab0SMike Gerdts#	2. If the disks are a mixture of 4Kn and 512n/512e, failures are likely.
38*b73ccab0SMike Gerdts#
39*b73ccab0SMike Gerdts
40*b73ccab0SMike Gerdtsverify_runnable "global"
41*b73ccab0SMike Gerdts
42*b73ccab0SMike Gerdtstypeset -a alldisks=($DISKS)
43*b73ccab0SMike Gerdts
44*b73ccab0SMike Gerdts# The larger the volsize, the better zvol_volsize_to_reservation() is at
45*b73ccab0SMike Gerdts# guessing the right number - though it is horrible with tiny blocks.  At 10M on
46*b73ccab0SMike Gerdts# ashift=12, the estimate may be over 26% too high.
47*b73ccab0SMike Gerdtsvolsize=100
48*b73ccab0SMike Gerdts
49*b73ccab0SMike Gerdtsfunction cleanup
50*b73ccab0SMike Gerdts{
51*b73ccab0SMike Gerdts	default_cleanup_noexit
52*b73ccab0SMike Gerdts	default_setup_noexit "${alldisks[0]}"
53*b73ccab0SMike Gerdts}
54*b73ccab0SMike Gerdts
55*b73ccab0SMike Gerdtslog_assert "raidz refreservation=auto picks worst raidz vdev"
56*b73ccab0SMike Gerdtslog_onexit cleanup
57*b73ccab0SMike Gerdts
58*b73ccab0SMike Gerdtspoolexists "$TESTPOOL" && log_must zpool destroy "$TESTPOOL"
59*b73ccab0SMike Gerdts
60*b73ccab0SMike Gerdts# Testing tiny block sizes on ashift=12 pools causes so much size inflation
61*b73ccab0SMike Gerdts# that small test disks may fill before creating small volumes.  However,
62*b73ccab0SMike Gerdts# testing 512b and 1K blocks on ashift=9 pools is an ok approximation for
63*b73ccab0SMike Gerdts# testing the problems that arise from 4K and 8K blocks on ashift=12 pools.
64*b73ccab0SMike Gerdtsbps=$(prtvtoc /dev/rdsk/${alldisks[0]} |
65*b73ccab0SMike Gerdts    awk '$NF == "bytes/sector" { print $2; exit 0 }')
66*b73ccab0SMike Gerdtscase "$bps" in
67*b73ccab0SMike Gerdts512)
68*b73ccab0SMike Gerdts	allshifts=(9 10 17)
69*b73ccab0SMike Gerdts	;;
70*b73ccab0SMike Gerdts4096)
71*b73ccab0SMike Gerdts	allshifts=(12 13 17)
72*b73ccab0SMike Gerdts	;;
73*b73ccab0SMike Gerdts*)
74*b73ccab0SMike Gerdts	log_fail "bytes/sector != (512|4096)"
75*b73ccab0SMike Gerdts	;;
76*b73ccab0SMike Gerdtsesac
77*b73ccab0SMike Gerdtslog_note "Testing in ashift=${allshifts[0]} mode"
78*b73ccab0SMike Gerdts
79*b73ccab0SMike Gerdtstypeset -A sizes=
80*b73ccab0SMike Gerdts
81*b73ccab0SMike Gerdts#
82*b73ccab0SMike Gerdts# Determine the refreservation for a $volsize MiB volume on each raidz type at
83*b73ccab0SMike Gerdts# various block sizes.
84*b73ccab0SMike Gerdts#
85*b73ccab0SMike Gerdtsfor parity in 1 2 3; do
86*b73ccab0SMike Gerdts	raid=raidz$parity
87*b73ccab0SMike Gerdts	typeset -A sizes["$raid"]
88*b73ccab0SMike Gerdts
89*b73ccab0SMike Gerdts	# Ensure we hit scenarios with and without skip blocks
90*b73ccab0SMike Gerdts	for ndisks in $((parity * 2)) $((parity * 2 + 1)); do
91*b73ccab0SMike Gerdts		typeset -a disks=(${alldisks[0..$((ndisks - 1))]})
92*b73ccab0SMike Gerdts
93*b73ccab0SMike Gerdts		if (( ${#disks[@]} < ndisks )); then
94*b73ccab0SMike Gerdts			log_note "Too few disks to test $raid-$ndisks"
95*b73ccab0SMike Gerdts			continue
96*b73ccab0SMike Gerdts		fi
97*b73ccab0SMike Gerdts
98*b73ccab0SMike Gerdts		typeset -A sizes["$raid"]["$ndisks"]
99*b73ccab0SMike Gerdts
100*b73ccab0SMike Gerdts		log_must zpool create "$TESTPOOL" "$raid" "${disks[@]}"
101*b73ccab0SMike Gerdts
102*b73ccab0SMike Gerdts		for bits in "${allshifts[@]}"; do
103*b73ccab0SMike Gerdts			vbs=$((1 << bits))
104*b73ccab0SMike Gerdts			log_note "Gathering refreservation for $raid-$ndisks" \
105*b73ccab0SMike Gerdts			    "volblocksize=$vbs"
106*b73ccab0SMike Gerdts
107*b73ccab0SMike Gerdts			vol=$TESTPOOL/$TESTVOL
108*b73ccab0SMike Gerdts			log_must zfs create -V ${volsize}m \
109*b73ccab0SMike Gerdts			    -o volblocksize=$vbs "$vol"
110*b73ccab0SMike Gerdts
111*b73ccab0SMike Gerdts			refres=$(zfs get -Hpo value refreservation "$vol")
112*b73ccab0SMike Gerdts			log_must test -n "$refres"
113*b73ccab0SMike Gerdts			sizes["$raid"]["$ndisks"]["$vbs"]=$refres
114*b73ccab0SMike Gerdts
115*b73ccab0SMike Gerdts			log_must zfs destroy "$vol"
116*b73ccab0SMike Gerdts		done
117*b73ccab0SMike Gerdts
118*b73ccab0SMike Gerdts		log_must zpool destroy "$TESTPOOL"
119*b73ccab0SMike Gerdts	done
120*b73ccab0SMike Gerdtsdone
121*b73ccab0SMike Gerdts
122*b73ccab0SMike Gerdts# A little extra info is always helpful when diagnosing problems.  To
123*b73ccab0SMike Gerdts# pretty-print what you find in the log, do this in ksh:
124*b73ccab0SMike Gerdts#   typeset -A sizes=(...)
125*b73ccab0SMike Gerdts#   print -v sizes
126*b73ccab0SMike Gerdtslog_note "sizes=$(print -C sizes)"
127*b73ccab0SMike Gerdts
128*b73ccab0SMike Gerdts#
129*b73ccab0SMike Gerdts# Helper furnction for checking that refreservation is calculated properly in
130*b73ccab0SMike Gerdts# multi-vdev pools.  "Properly" is defined as assuming that all vdevs are as
131*b73ccab0SMike Gerdts# space inefficient as the worst one.
132*b73ccab0SMike Gerdts#
133*b73ccab0SMike Gerdtsfunction check_vdevs {
134*b73ccab0SMike Gerdts	typeset raid=$1
135*b73ccab0SMike Gerdts	typeset nd1=$2
136*b73ccab0SMike Gerdts	typeset nd2=$3
137*b73ccab0SMike Gerdts	typeset -a disks1 disks2
138*b73ccab0SMike Gerdts	typeset vbs vol refres refres1 refres2 expect
139*b73ccab0SMike Gerdts
140*b73ccab0SMike Gerdts	disks1=(${alldisks[0..$((nd1 - 1))]})
141*b73ccab0SMike Gerdts	disks2=(${alldisks[$nd1..$((nd1 + nd2 - 1))]})
142*b73ccab0SMike Gerdts	if (( ${#disks2[@]} < nd2 )); then
143*b73ccab0SMike Gerdts		log_note "Too few disks to test $raid-$nd1 + $raid=$nd2"
144*b73ccab0SMike Gerdts		return
145*b73ccab0SMike Gerdts	fi
146*b73ccab0SMike Gerdts
147*b73ccab0SMike Gerdts	log_must zpool create -f "$TESTPOOL" \
148*b73ccab0SMike Gerdts	    "$raid" "${disks1[@]}" "$raid" "${disks2[@]}"
149*b73ccab0SMike Gerdts
150*b73ccab0SMike Gerdts	for bits in "${allshifts[@]}"; do
151*b73ccab0SMike Gerdts		vbs=$((1 << bits))
152*b73ccab0SMike Gerdts		log_note "Verifying $raid-$nd1 $raid-$nd2 volblocksize=$vbs"
153*b73ccab0SMike Gerdts
154*b73ccab0SMike Gerdts		vol=$TESTPOOL/$TESTVOL
155*b73ccab0SMike Gerdts		log_must zfs create -V ${volsize}m -o volblocksize=$vbs "$vol"
156*b73ccab0SMike Gerdts		refres=$(zfs get -Hpo value refreservation "$vol")
157*b73ccab0SMike Gerdts		log_must test -n "$refres"
158*b73ccab0SMike Gerdts
159*b73ccab0SMike Gerdts		refres1=${sizes["$raid"]["$nd1"]["$vbs"]}
160*b73ccab0SMike Gerdts		refres2=${sizes["$raid"]["$nd2"]["$vbs"]}
161*b73ccab0SMike Gerdts
162*b73ccab0SMike Gerdts		if (( refres1 > refres2 )); then
163*b73ccab0SMike Gerdts			log_note "Expecting refres ($refres) to match refres" \
164*b73ccab0SMike Gerdts			   "from $raid-$nd1 ($refres1)"
165*b73ccab0SMike Gerdts			log_must test "$refres" -eq "$refres1"
166*b73ccab0SMike Gerdts		else
167*b73ccab0SMike Gerdts			log_note "Expecting refres ($refres) to match refres" \
168*b73ccab0SMike Gerdts			   "from $raid-$nd1 ($refres2)"
169*b73ccab0SMike Gerdts			log_must test "$refres" -eq "$refres2"
170*b73ccab0SMike Gerdts		fi
171*b73ccab0SMike Gerdts
172*b73ccab0SMike Gerdts		log_must zfs destroy "$vol"
173*b73ccab0SMike Gerdts	done
174*b73ccab0SMike Gerdts
175*b73ccab0SMike Gerdts	log_must zpool destroy "$TESTPOOL"
176*b73ccab0SMike Gerdts}
177*b73ccab0SMike Gerdts
178*b73ccab0SMike Gerdts#
179*b73ccab0SMike Gerdts# Verify that multi-vdev pools use the last optimistic size for all the
180*b73ccab0SMike Gerdts# permutations within a particular raidz variant.
181*b73ccab0SMike Gerdts#
182*b73ccab0SMike Gerdtsfor raid in "${!sizes[@]}"; do
183*b73ccab0SMike Gerdts	# ksh likes to create a [0] item for us.  Thanks, ksh!
184*b73ccab0SMike Gerdts	[[ $raid == "0" ]] && continue
185*b73ccab0SMike Gerdts
186*b73ccab0SMike Gerdts	for nd1 in "${!sizes["$raid"][@]}"; do
187*b73ccab0SMike Gerdts		[[ $nd1 == "0" ]] && continue
188*b73ccab0SMike Gerdts
189*b73ccab0SMike Gerdts		for nd2 in "${!sizes["$raid"][@]}"; do
190*b73ccab0SMike Gerdts			[[ $nd2 == "0" ]] && continue
191*b73ccab0SMike Gerdts
192*b73ccab0SMike Gerdts			check_vdevs "$raid" "$nd1" "$nd2"
193*b73ccab0SMike Gerdts		done
194*b73ccab0SMike Gerdts	done
195*b73ccab0SMike Gerdtsdone
196*b73ccab0SMike Gerdts
197*b73ccab0SMike Gerdtslog_pass "raidz refreservation=auto picks worst raidz vdev"
198