1#!/usr/bin/ksh -p
2#
3#
4# This file and its contents are supplied under the terms of the
5# Common Development and Distribution License ("CDDL"), version 1.0.
6# You may only use this file in accordance with the terms of version
7# 1.0 of the CDDL.
8#
9# A full copy of the text of the CDDL should have accompanied this
10# source.  A copy of the CDDL is also available via the Internet at
11# http://www.illumos.org/license/CDDL.
12#
13
14#
15# Copyright (c) 2016 by Delphix. All rights reserved.
16# Copyright 2019 Joyent, Inc.
17#
18
19. $STF_SUITE/include/libtest.shlib
20. $STF_SUITE/tests/functional/snapshot/snapshot.cfg
21
22#
23# DESCRIPTION:
24#
25# This test ensures that the following race condition does not
26# take place:
27#
28# 1] A sync thread inserts a new entry in the deadlist of a
29#    snapshot. The dle_bpobj at that entry currently is the
30#    empty bpobj (our sentinel), so we close it and we are
31#    about to reopen it. (see dle_enqueue())
32#
33# 2] At the same time a thread executing an administrative
34#    command that uses dsl_deadlist_space_range() is about
35#    to dereference that same bpobj that was just closed
36#    and therefore is NULL.
37#
38# 3] The sync thread loses the race and we dereference the
39#    NULL pointer in the kernel.
40#
41# STRATEGY:
42#
43# 1. Setup a folder and create a bunch of test files. Take a
44#    snapshot right after you create a new test file.
45# 2. Start DTrace in the background to put a delay in the
46#    sync thread after it closes the empty bpobj and before
47#    it reopens it. The dtrace process is set to exit when this
48#    script exits.
49# 3. Start a process in the backgroud that runs zfs-destroy
50#    dry-runs in an infinite loop. The idea is to keep calling
51#    dsl_deadlist_space_range().
52# 4. Go ahead and start removing the test files. This should
53#    start populating the deadlist of each snapshot with
54#    entries and go through the dle_enqueue() target code.
55# 5. Kill the 'zfs destroy' loop and clean up the dataset.
56#
57
58verify_runnable "both"
59
60
61DLDS="dl_race"
62
63function cleanup
64{
65	log_must kill -9 $DLOOP_PID
66	log_must zfs destroy -fR $TESTPOOL/$TESTFS/$DLDS
67}
68
69function setup
70{
71	log_must zfs create $TESTPOOL/$TESTFS/$DLDS
72	for i in {1..50}; do
73		log_must mkfile 1m /$TESTDIR/$DLDS/dl_test_file$i
74		log_must zfs snapshot $TESTPOOL/$TESTFS/$DLDS@snap${i}
75	done
76}
77
78function destroy_nv_loop
79{
80	while true; do
81		log_must zfs destroy -nv $TESTPOOL/$TESTFS/$DLDS@snap1%snap50
82	done
83}
84
85log_onexit cleanup
86
87setup
88log_must sync
89
90log_must dtrace -p "$PPID" -qwn "fbt::bpobj_decr_empty:entry { chill(500000000); }" &
91sleep 1
92
93destroy_nv_loop &
94DLOOP_PID="$!"
95sleep 1
96
97for i in {1..50}; do
98	log_must rm /$TESTDIR/$DLDS/dl_test_file$i
99done
100log_must sync
101
102log_pass "There should be no race condition when an administrative command" \
103    " attempts to read a deadlist's entries while a sync" \
104    " thread is manipulating it."
105