10713e23George Wilson/*
20713e23George Wilson * CDDL HEADER START
30713e23George Wilson *
40713e23George Wilson * The contents of this file are subject to the terms of the
50713e23George Wilson * Common Development and Distribution License (the "License").
60713e23George Wilson * You may not use this file except in compliance with the License.
70713e23George Wilson *
80713e23George Wilson * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90713e23George Wilson * or http://www.opensolaris.org/os/licensing.
100713e23George Wilson * See the License for the specific language governing permissions
110713e23George Wilson * and limitations under the License.
120713e23George Wilson *
130713e23George Wilson * When distributing Covered Code, include this CDDL HEADER in each
140713e23George Wilson * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150713e23George Wilson * If applicable, add the following below this CDDL HEADER, with the
160713e23George Wilson * fields enclosed by brackets "[]" replaced with your own identifying
170713e23George Wilson * information: Portions Copyright [yyyy] [name of copyright owner]
180713e23George Wilson *
190713e23George Wilson * CDDL HEADER END
200713e23George Wilson */
210713e23George Wilson/*
220713e23George Wilson * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
230713e23George Wilson * Use is subject to license terms.
240713e23George Wilson */
250713e23George Wilson
260713e23George Wilson/*
27814dcd4Serapheim Dimitropoulos * Copyright (c) 2013, 2019 by Delphix. All rights reserved.
280713e23George Wilson */
290713e23George Wilson
300713e23George Wilson#ifndef _SYS_RANGE_TREE_H
310713e23George Wilson#define	_SYS_RANGE_TREE_H
320713e23George Wilson
334d7988dPaul Dagnelie#include <sys/btree.h>
340713e23George Wilson#include <sys/dmu.h>
350713e23George Wilson
360713e23George Wilson#ifdef	__cplusplus
370713e23George Wilsonextern "C" {
380713e23George Wilson#endif
390713e23George Wilson
400713e23George Wilson#define	RANGE_TREE_HISTOGRAM_SIZE	64
410713e23George Wilson
420713e23George Wilsontypedef struct range_tree_ops range_tree_ops_t;
430713e23George Wilson
444d7988dPaul Dagnelietypedef enum range_seg_type {
454d7988dPaul Dagnelie	RANGE_SEG32,
464d7988dPaul Dagnelie	RANGE_SEG64,
474d7988dPaul Dagnelie	RANGE_SEG_GAP,
484d7988dPaul Dagnelie	RANGE_SEG_NUM_TYPES,
494d7988dPaul Dagnelie} range_seg_type_t;
504d7988dPaul Dagnelie
515cabbc6Prashanth Sreenivasa/*
525cabbc6Prashanth Sreenivasa * Note: the range_tree may not be accessed concurrently; consumers
535cabbc6Prashanth Sreenivasa * must provide external locking if required.
545cabbc6Prashanth Sreenivasa */
550713e23George Wilsontypedef struct range_tree {
564d7988dPaul Dagnelie	zfs_btree_t	rt_root;	/* offset-ordered segment b-tree */
570713e23George Wilson	uint64_t	rt_space;	/* sum of all segments in the map */
584d7988dPaul Dagnelie	range_seg_type_t rt_type;	/* type of range_seg_t in use */
594d7988dPaul Dagnelie	/*
604d7988dPaul Dagnelie	 * All data that is stored in the range tree must have a start higher
614d7988dPaul Dagnelie	 * than or equal to rt_start, and all sizes and offsets must be
624d7988dPaul Dagnelie	 * multiples of 1 << rt_shift.
634d7988dPaul Dagnelie	 */
644d7988dPaul Dagnelie	uint8_t		rt_shift;
654d7988dPaul Dagnelie	uint64_t	rt_start;
660713e23George Wilson	range_tree_ops_t *rt_ops;
674d7988dPaul Dagnelie
684d7988dPaul Dagnelie	/* rt_btree_compare should only be set if rt_arg is a b-tree */
690713e23George Wilson	void		*rt_arg;
704d7988dPaul Dagnelie	int (*rt_btree_compare)(const void *, const void *);
710713e23George Wilson
724d7988dPaul Dagnelie	uint64_t	rt_gap;		/* allowable inter-segment gap */
73a3874b8Toomas Soome
740713e23George Wilson	/*
750713e23George Wilson	 * The rt_histogram maintains a histogram of ranges. Each bucket,
760713e23George Wilson	 * rt_histogram[i], contains the number of ranges whose size is:
770713e23George Wilson	 * 2^i <= size of range in bytes < 2^(i+1)
780713e23George Wilson	 */
790713e23George Wilson	uint64_t	rt_histogram[RANGE_TREE_HISTOGRAM_SIZE];
800713e23George Wilson} range_tree_t;
810713e23George Wilson
824d7988dPaul Dagnelietypedef struct range_seg32 {
834d7988dPaul Dagnelie	uint32_t	rs_start;	/* starting offset of this segment */
844d7988dPaul Dagnelie	uint32_t	rs_end;		/* ending offset (non-inclusive) */
854d7988dPaul Dagnelie} range_seg32_t;
864d7988dPaul Dagnelie
874d7988dPaul Dagnelie/*
884d7988dPaul Dagnelie * Extremely large metaslabs, vdev-wide trees, and dnode-wide trees may
894d7988dPaul Dagnelie * require 64-bit integers for ranges.
904d7988dPaul Dagnelie */
914d7988dPaul Dagnelietypedef struct range_seg64 {
924d7988dPaul Dagnelie	uint64_t	rs_start;	/* starting offset of this segment */
934d7988dPaul Dagnelie	uint64_t	rs_end;		/* ending offset (non-inclusive) */
944d7988dPaul Dagnelie} range_seg64_t;
954d7988dPaul Dagnelie
964d7988dPaul Dagnelietypedef struct range_seg_gap {
970713e23George Wilson	uint64_t	rs_start;	/* starting offset of this segment */
980713e23George Wilson	uint64_t	rs_end;		/* ending offset (non-inclusive) */
99a3874b8Toomas Soome	uint64_t	rs_fill;	/* actual fill if gap mode is on */
1004d7988dPaul Dagnelie} range_seg_gap_t;
1014d7988dPaul Dagnelie
1024d7988dPaul Dagnelie/*
1034d7988dPaul Dagnelie * This type needs to be the largest of the range segs, since it will be stack
1044d7988dPaul Dagnelie * allocated and then cast the actual type to do tree operations.
1054d7988dPaul Dagnelie */
1064d7988dPaul Dagnelietypedef range_seg_gap_t range_seg_max_t;
1074d7988dPaul Dagnelie
1084d7988dPaul Dagnelie/*
1094d7988dPaul Dagnelie * This is just for clarity of code purposes, so we can make it clear that a
1104d7988dPaul Dagnelie * pointer is to a range seg of some type; when we need to do the actual math,
1114d7988dPaul Dagnelie * we'll figure out the real type.
1124d7988dPaul Dagnelie */
1134d7988dPaul Dagnelietypedef void range_seg_t;
1140713e23George Wilson
1150713e23George Wilsonstruct range_tree_ops {
1160713e23George Wilson	void    (*rtop_create)(range_tree_t *rt, void *arg);
1170713e23George Wilson	void    (*rtop_destroy)(range_tree_t *rt, void *arg);
1184d7988dPaul Dagnelie	void	(*rtop_add)(range_tree_t *rt, void *rs, void *arg);
1194d7988dPaul Dagnelie	void    (*rtop_remove)(range_tree_t *rt, void *rs, void *arg);
1200713e23George Wilson	void	(*rtop_vacate)(range_tree_t *rt, void *arg);
1210713e23George Wilson};
1220713e23George Wilson
1234d7988dPaul Dagneliestatic inline uint64_t
1244d7988dPaul Dagneliers_get_start_raw(const range_seg_t *rs, const range_tree_t *rt)
1254d7988dPaul Dagnelie{
1264d7988dPaul Dagnelie	ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
1274d7988dPaul Dagnelie	switch (rt->rt_type) {
1284d7988dPaul Dagnelie	case RANGE_SEG32:
129028b5dfPaul Dagnelie		return (((const range_seg32_t *)rs)->rs_start);
1304d7988dPaul Dagnelie	case RANGE_SEG64:
131028b5dfPaul Dagnelie		return (((const range_seg64_t *)rs)->rs_start);
1324d7988dPaul Dagnelie	case RANGE_SEG_GAP:
133028b5dfPaul Dagnelie		return (((const range_seg_gap_t *)rs)->rs_start);
1344d7988dPaul Dagnelie	default:
1354d7988dPaul Dagnelie		VERIFY(0);
1364d7988dPaul Dagnelie		return (0);
1374d7988dPaul Dagnelie	}
1384d7988dPaul Dagnelie}
1394d7988dPaul Dagnelie
1404d7988dPaul Dagneliestatic inline uint64_t
1414d7988dPaul Dagneliers_get_end_raw(const range_seg_t *rs, const range_tree_t *rt)
1424d7988dPaul Dagnelie{
1434d7988dPaul Dagnelie	ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
1444d7988dPaul Dagnelie	switch (rt->rt_type) {
1454d7988dPaul Dagnelie	case RANGE_SEG32:
146028b5dfPaul Dagnelie		return (((const range_seg32_t *)rs)->rs_end);
1474d7988dPaul Dagnelie	case RANGE_SEG64:
148028b5dfPaul Dagnelie		return (((const range_seg64_t *)rs)->rs_end);
1494d7988dPaul Dagnelie	case RANGE_SEG_GAP:
150028b5dfPaul Dagnelie		return (((const range_seg_gap_t *)rs)->rs_end);
1514d7988dPaul Dagnelie	default:
1524d7988dPaul Dagnelie		VERIFY(0);
1534d7988dPaul Dagnelie		return (0);
1544d7988dPaul Dagnelie	}
1554d7988dPaul Dagnelie}
1564d7988dPaul Dagnelie
1574d7988dPaul Dagneliestatic inline uint64_t
1584d7988dPaul Dagneliers_get_fill_raw(const range_seg_t *rs, const range_tree_t *rt)
1594d7988dPaul Dagnelie{
1604d7988dPaul Dagnelie	ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
1614d7988dPaul Dagnelie	switch (rt->rt_type) {
1624d7988dPaul Dagnelie	case RANGE_SEG32: {
1634d7988dPaul Dagnelie		const range_seg32_t *r32 = rs;
1644d7988dPaul Dagnelie		return (r32->rs_end - r32->rs_start);
1654d7988dPaul Dagnelie	}
1664d7988dPaul Dagnelie	case RANGE_SEG64: {
1674d7988dPaul Dagnelie		const range_seg64_t *r64 = rs;
1684d7988dPaul Dagnelie		return (r64->rs_end - r64->rs_start);
1694d7988dPaul Dagnelie	}
1704d7988dPaul Dagnelie	case RANGE_SEG_GAP:
1714d7988dPaul Dagnelie		return (((range_seg_gap_t *)rs)->rs_fill);
1724d7988dPaul Dagnelie	default:
1734d7988dPaul Dagnelie		VERIFY(0);
1744d7988dPaul Dagnelie		return (0);
1754d7988dPaul Dagnelie	}
1764d7988dPaul Dagnelie
1774d7988dPaul Dagnelie}
1784d7988dPaul Dagnelie
1794d7988dPaul Dagneliestatic inline uint64_t
1804d7988dPaul Dagneliers_get_start(const range_seg_t *rs, const range_tree_t *rt)
1814d7988dPaul Dagnelie{
1824d7988dPaul Dagnelie	return ((rs_get_start_raw(rs, rt) << rt->rt_shift) + rt->rt_start);
1834d7988dPaul Dagnelie}
1844d7988dPaul Dagnelie
1854d7988dPaul Dagneliestatic inline uint64_t
1864d7988dPaul Dagneliers_get_end(const range_seg_t *rs, const range_tree_t *rt)
1874d7988dPaul Dagnelie{
1884d7988dPaul Dagnelie	return ((rs_get_end_raw(rs, rt) << rt->rt_shift) + rt->rt_start);
1894d7988dPaul Dagnelie}
1904d7988dPaul Dagnelie
1914d7988dPaul Dagneliestatic inline uint64_t
1924d7988dPaul Dagneliers_get_fill(const range_seg_t *rs, const range_tree_t *rt)
1934d7988dPaul Dagnelie{
1944d7988dPaul Dagnelie	return (rs_get_fill_raw(rs, rt) << rt->rt_shift);
1954d7988dPaul Dagnelie}
1964d7988dPaul Dagnelie
1974d7988dPaul Dagneliestatic inline void
1984d7988dPaul Dagneliers_set_start_raw(range_seg_t *rs, range_tree_t *rt, uint64_t start)
1994d7988dPaul Dagnelie{
2004d7988dPaul Dagnelie	ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
2014d7988dPaul Dagnelie	switch (rt->rt_type) {
2024d7988dPaul Dagnelie	case RANGE_SEG32:
2034d7988dPaul Dagnelie		ASSERT3U(start, <=, UINT32_MAX);
2044d7988dPaul Dagnelie		((range_seg32_t *)rs)->rs_start = (uint32_t)start;
2054d7988dPaul Dagnelie		break;
2064d7988dPaul Dagnelie	case RANGE_SEG64:
2074d7988dPaul Dagnelie		((range_seg64_t *)rs)->rs_start = start;
2084d7988dPaul Dagnelie		break;
2094d7988dPaul Dagnelie	case RANGE_SEG_GAP:
2104d7988dPaul Dagnelie		((range_seg_gap_t *)rs)->rs_start = start;
2114d7988dPaul Dagnelie		break;
2124d7988dPaul Dagnelie	default:
2134d7988dPaul Dagnelie		VERIFY(0);
2144d7988dPaul Dagnelie	}
2154d7988dPaul Dagnelie}
2164d7988dPaul Dagnelie
2174d7988dPaul Dagneliestatic inline void
2184d7988dPaul Dagneliers_set_end_raw(range_seg_t *rs, range_tree_t *rt, uint64_t end)
2194d7988dPaul Dagnelie{
2204d7988dPaul Dagnelie	ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
2214d7988dPaul Dagnelie	switch (rt->rt_type) {
2224d7988dPaul Dagnelie	case RANGE_SEG32:
2234d7988dPaul Dagnelie		ASSERT3U(end, <=, UINT32_MAX);
2244d7988dPaul Dagnelie		((range_seg32_t *)rs)->rs_end = (uint32_t)end;
2254d7988dPaul Dagnelie		break;
2264d7988dPaul Dagnelie	case RANGE_SEG64:
2274d7988dPaul Dagnelie		((range_seg64_t *)rs)->rs_end = end;
2284d7988dPaul Dagnelie		break;
2294d7988dPaul Dagnelie	case RANGE_SEG_GAP:
2304d7988dPaul Dagnelie		((range_seg_gap_t *)rs)->rs_end = end;
2314d7988dPaul Dagnelie		break;
2324d7988dPaul Dagnelie	default:
2334d7988dPaul Dagnelie		VERIFY(0);
2344d7988dPaul Dagnelie	}
2354d7988dPaul Dagnelie}
2364d7988dPaul Dagnelie
2374d7988dPaul Dagneliestatic inline void
2384d7988dPaul Dagneliers_set_fill_raw(range_seg_t *rs, range_tree_t *rt, uint64_t fill)
2394d7988dPaul Dagnelie{
2404d7988dPaul Dagnelie	ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
2414d7988dPaul Dagnelie	switch (rt->rt_type) {
2424d7988dPaul Dagnelie	case RANGE_SEG32:
2434d7988dPaul Dagnelie		/* fall through */
2444d7988dPaul Dagnelie	case RANGE_SEG64:
2454d7988dPaul Dagnelie		ASSERT3U(fill, ==, rs_get_end_raw(rs, rt) - rs_get_start_raw(rs,
2464d7988dPaul Dagnelie		    rt));
2474d7988dPaul Dagnelie		break;
2484d7988dPaul Dagnelie	case RANGE_SEG_GAP:
2494d7988dPaul Dagnelie		((range_seg_gap_t *)rs)->rs_fill = fill;
2504d7988dPaul Dagnelie		break;
2514d7988dPaul Dagnelie	default:
2524d7988dPaul Dagnelie		VERIFY(0);
2534d7988dPaul Dagnelie	}
2544d7988dPaul Dagnelie}
2554d7988dPaul Dagnelie
2564d7988dPaul Dagneliestatic inline void
2574d7988dPaul Dagneliers_set_start(range_seg_t *rs, range_tree_t *rt, uint64_t start)
2584d7988dPaul Dagnelie{
2594d7988dPaul Dagnelie	ASSERT3U(start, >=, rt->rt_start);
2604d7988dPaul Dagnelie	ASSERT(IS_P2ALIGNED(start, 1ULL << rt->rt_shift));
2614d7988dPaul Dagnelie	rs_set_start_raw(rs, rt, (start - rt->rt_start) >> rt->rt_shift);
2624d7988dPaul Dagnelie}
2634d7988dPaul Dagnelie
2644d7988dPaul Dagneliestatic inline void
2654d7988dPaul Dagneliers_set_end(range_seg_t *rs, range_tree_t *rt, uint64_t end)
2664d7988dPaul Dagnelie{
2674d7988dPaul Dagnelie	ASSERT3U(end, >=, rt->rt_start);
2684d7988dPaul Dagnelie	ASSERT(IS_P2ALIGNED(end, 1ULL << rt->rt_shift));
2694d7988dPaul Dagnelie	rs_set_end_raw(rs, rt, (end - rt->rt_start) >> rt->rt_shift);
2704d7988dPaul Dagnelie}
2714d7988dPaul Dagnelie
2724d7988dPaul Dagneliestatic inline void
2734d7988dPaul Dagneliers_set_fill(range_seg_t *rs, range_tree_t *rt, uint64_t fill)
2744d7988dPaul Dagnelie{
2754d7988dPaul Dagnelie	ASSERT(IS_P2ALIGNED(fill, 1ULL << rt->rt_shift));
2764d7988dPaul Dagnelie	rs_set_fill_raw(rs, rt, fill >> rt->rt_shift);
2774d7988dPaul Dagnelie}
2784d7988dPaul Dagnelie
2790713e23George Wilsontypedef void range_tree_func_t(void *arg, uint64_t start, uint64_t size);
2800713e23George Wilson
2814d7988dPaul Dagnelierange_tree_t *range_tree_create_impl(range_tree_ops_t *ops,
2824d7988dPaul Dagnelie    range_seg_type_t type, void *arg, uint64_t start, uint64_t shift,
2834d7988dPaul Dagnelie    int (*zfs_btree_compare) (const void *, const void *), uint64_t gap);
2844d7988dPaul Dagnelierange_tree_t *range_tree_create(range_tree_ops_t *ops, range_seg_type_t type,
2854d7988dPaul Dagnelie    void *arg, uint64_t start, uint64_t shift);
2860713e23George Wilsonvoid range_tree_destroy(range_tree_t *rt);
2870713e23George Wilsonboolean_t range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size);
2884d7988dPaul Dagnelierange_seg_t *range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size);
289af1d63aPaul Dagnelieboolean_t range_tree_find_in(range_tree_t *rt, uint64_t start, uint64_t size,
290af1d63aPaul Dagnelie    uint64_t *ostart, uint64_t *osize);
291555d674Serapheim Dimitropoulosvoid range_tree_verify_not_present(range_tree_t *rt,
292555d674Serapheim Dimitropoulos    uint64_t start, uint64_t size);
293a3874b8Toomas Soomevoid range_tree_resize_segment(range_tree_t *rt, range_seg_t *rs,
294a3874b8Toomas Soome    uint64_t newstart, uint64_t newsize);
2950713e23George Wilsonuint64_t range_tree_space(range_tree_t *rt);
296814dcd4Serapheim Dimitropoulosuint64_t range_tree_numsegs(range_tree_t *rt);
2978671400Serapheim Dimitropoulosboolean_t range_tree_is_empty(range_tree_t *rt);
2980713e23George Wilsonvoid range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst);
2990713e23George Wilsonvoid range_tree_stat_verify(range_tree_t *rt);
300cfd63e1Matthew Ahrensuint64_t range_tree_min(range_tree_t *rt);
301cfd63e1Matthew Ahrensuint64_t range_tree_max(range_tree_t *rt);
302cfd63e1Matthew Ahrensuint64_t range_tree_span(range_tree_t *rt);
3030713e23George Wilson
3040713e23George Wilsonvoid range_tree_add(void *arg, uint64_t start, uint64_t size);
3050713e23George Wilsonvoid range_tree_remove(void *arg, uint64_t start, uint64_t size);
306a3874b8Toomas Soomevoid range_tree_remove_fill(range_tree_t *rt, uint64_t start, uint64_t size);
307a3874b8Toomas Soomevoid range_tree_adjust_fill(range_tree_t *rt, range_seg_t *rs, int64_t delta);
308bf16b11Matthew Ahrensvoid range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size);
3090713e23George Wilson
3100713e23George Wilsonvoid range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg);
3110713e23George Wilsonvoid range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg);
312a3874b8Toomas Soomerange_seg_t *range_tree_first(range_tree_t *rt);
313a3874b8Toomas Soome
314814dcd4Serapheim Dimitropoulosvoid range_tree_remove_xor_add_segment(uint64_t start, uint64_t end,
315814dcd4Serapheim Dimitropoulos    range_tree_t *removefrom, range_tree_t *addto);
316814dcd4Serapheim Dimitropoulosvoid range_tree_remove_xor_add(range_tree_t *rt, range_tree_t *removefrom,
317814dcd4Serapheim Dimitropoulos    range_tree_t *addto);
318814dcd4Serapheim Dimitropoulos
3194d7988dPaul Dagnelievoid rt_btree_create(range_tree_t *rt, void *arg);
3204d7988dPaul Dagnelievoid rt_btree_destroy(range_tree_t *rt, void *arg);
3214d7988dPaul Dagnelievoid rt_btree_add(range_tree_t *rt, range_seg_t *rs, void *arg);
3224d7988dPaul Dagnelievoid rt_btree_remove(range_tree_t *rt, range_seg_t *rs, void *arg);
3234d7988dPaul Dagnelievoid rt_btree_vacate(range_tree_t *rt, void *arg);
3244d7988dPaul Dagnelieextern range_tree_ops_t rt_btree_ops;
3250713e23George Wilson
3260713e23George Wilson#ifdef	__cplusplus
3270713e23George Wilson}
3280713e23George Wilson#endif
3290713e23George Wilson
3300713e23George Wilson#endif	/* _SYS_RANGE_TREE_H */
331