/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Copyright (c) 2014 by Delphix. All rights reserved. * Copyright 2018 Joyent, Inc. */ #ifndef _VM_HAT_I86_H #define _VM_HAT_I86_H #ifdef __cplusplus extern "C" { #endif /* * VM - Hardware Address Translation management. * * This file describes the contents of the x86_64 HAT data structures. */ #include #include #include #include #include #include #include #include #include #include #include #include /* * The essential data types involved: * * htable_t - There is one of these for each page table and it is used * by the HAT to manage the page table. * * hment_t - Links together multiple PTEs to a single page. */ /* * Maximum number of per-CPU pagetable entries that we'll need to cache in the * HAT. See the big theory statement in uts/i86pc/vm/hat_i86.c for more * information. */ #if defined(__xpv) /* * The Xen hypervisor does not use per-CPU pagetables (PCP). Define a single * struct member for it at least to make life easier and not make the member * conditional. */ #define MAX_COPIED_PTES 1 #else /* * The 64-bit kernel may have up to 512 PTEs present in it for a given process. */ #define MAX_COPIED_PTES 512 #endif /* __xpv */ #define TOP_LEVEL(h) (((h)->hat_max_level)) /* * The hat struct exists for each address space. */ struct hat { kmutex_t hat_mutex; struct as *hat_as; uint_t hat_stats; pgcnt_t hat_pages_mapped[MAX_PAGE_LEVEL + 1]; pgcnt_t hat_ism_pgcnt; cpuset_t hat_cpus; uint16_t hat_flags; uint8_t hat_max_level; /* top level of this HAT */ uint_t hat_num_copied; /* Actual num of hat_copied_ptes[] */ htable_t *hat_htable; /* top level htable */ struct hat *hat_next; struct hat *hat_prev; uint_t hat_num_hash; /* number of htable hash buckets */ htable_t **hat_ht_hash; /* htable hash buckets */ htable_t *hat_ht_cached; /* cached free htables */ x86pte_t hat_copied_ptes[MAX_COPIED_PTES]; #if defined(__amd64) && defined(__xpv) pfn_t hat_user_ptable; /* alt top ptable for user mode */ #endif }; typedef struct hat hat_t; #define PGCNT_INC(hat, level) \ atomic_inc_ulong(&(hat)->hat_pages_mapped[level]); #define PGCNT_DEC(hat, level) \ atomic_dec_ulong(&(hat)->hat_pages_mapped[level]); /* * Flags for the hat_flags field. For more information, please see the big * theory statement on the HAT design in uts/i86pc/vm/hat_i86.c. * * HAT_FREEING - set when HAT is being destroyed - mostly used to detect that * demap()s can be avoided. * * HAT_COPIED - Indicates this HAT is a source for per-cpu page tables: see the * big comment in hat_i86.c for a description. * * HAT_COPIED_32 - HAT_COPIED, but for an ILP32 process. * * HAT_VICTIM - This is set while a hat is being examined for page table * stealing and prevents it from being freed. * * HAT_SHARED - The hat has exported it's page tables via hat_share() * * HAT_PINNED - On the hypervisor, indicates the top page table has been pinned. * * HAT_PCP - Used for the per-cpu user page table (i.e. associated with a CPU, * not a process). */ #define HAT_FREEING (0x0001) #define HAT_VICTIM (0x0002) #define HAT_SHARED (0x0004) #define HAT_PINNED (0x0008) #define HAT_COPIED (0x0010) #define HAT_COPIED_32 (0x0020) #define HAT_PCP (0x0040) /* * Additional platform attribute for hat_devload() to force no caching. */ #define HAT_PLAT_NOCACHE (0x100000) /* * Simple statistics for the HAT. These are just counters that are * atomically incremented. They can be reset directly from the kernel * debugger. */ struct hatstats { ulong_t hs_reap_attempts; ulong_t hs_reaped; ulong_t hs_steals; ulong_t hs_ptable_allocs; ulong_t hs_ptable_frees; ulong_t hs_htable_rgets; /* allocs from reserve */ ulong_t hs_htable_rputs; /* putbacks to reserve */ ulong_t hs_htable_shared; /* number of htables shared */ ulong_t hs_htable_unshared; /* number of htables unshared */ ulong_t hs_hm_alloc; ulong_t hs_hm_free; ulong_t hs_hm_put_reserve; ulong_t hs_hm_get_reserve; ulong_t hs_hm_steals; ulong_t hs_hm_steal_exam; ulong_t hs_tlb_inval_delayed; ulong_t hs_hat_copied64; ulong_t hs_hat_copied32; ulong_t hs_hat_normal64; }; extern struct hatstats hatstat; #ifdef DEBUG #define HATSTAT_INC(x) (++hatstat.x) #else #define HATSTAT_INC(x) (0) #endif #if defined(_KERNEL) /* * Useful macro to align hat_XXX() address arguments to a page boundary */ #define ALIGN2PAGE(a) ((uintptr_t)(a) & MMU_PAGEMASK) #define IS_PAGEALIGNED(a) (((uintptr_t)(a) & MMU_PAGEOFFSET) == 0) extern uint_t khat_running; /* set at end of hat_kern_setup() */ extern cpuset_t khat_cpuset; /* cpuset for kernal address demap Xcalls */ extern kmutex_t hat_list_lock; extern kcondvar_t hat_list_cv; /* * Interfaces to setup a cpu private mapping (ie. preemption disabled). * The attr and flags arguments are the same as for hat_devload(). * setup() must be called once, then any number of calls to remap(), * followed by a final call to release() * * Used by ppcopy(), page_zero(), the memscrubber, and the kernel debugger. */ typedef paddr_t hat_mempte_t; /* phys addr of PTE */ extern hat_mempte_t hat_mempte_setup(caddr_t addr); extern void hat_mempte_remap(pfn_t, caddr_t, hat_mempte_t, uint_t attr, uint_t flags); extern void hat_mempte_release(caddr_t addr, hat_mempte_t); /* * Interfaces to manage which thread has access to htable and hment reserves. * The USE_HAT_RESERVES macro should always be recomputed in full. Its value * (due to curthread) can change after any call into kmem/vmem. */ extern uint_t can_steal_post_boot; extern uint_t use_boot_reserve; #define USE_HAT_RESERVES() \ (use_boot_reserve || curthread->t_hatdepth > 1 || \ panicstr != NULL || vmem_is_populator()) /* * initialization stuff needed by by startup, mp_startup... */ extern void hat_cpu_online(struct cpu *); extern void hat_cpu_offline(struct cpu *); extern void setup_vaddr_for_ppcopy(struct cpu *); extern void teardown_vaddr_for_ppcopy(struct cpu *); extern void clear_boot_mappings(uintptr_t, uintptr_t); /* * magic value to indicate that all TLB entries should be demapped. */ #define DEMAP_ALL_ADDR (~(uintptr_t)0) /* * not in any include file??? */ extern void halt(char *fmt); /* * x86 specific routines for use online in setup or i86pc/vm files */ extern void hat_kern_alloc(caddr_t segmap_base, size_t segmap_size, caddr_t ekernelheap); extern void hat_kern_setup(void); extern void hat_pte_unmap(htable_t *ht, uint_t entry, uint_t flags, x86pte_t old_pte, void *pte_ptr, boolean_t tlb); extern void hat_init_finish(void); extern caddr_t hat_kpm_pfn2va(pfn_t pfn); extern pfn_t hat_kpm_va2pfn(caddr_t); extern page_t *hat_kpm_vaddr2page(caddr_t); extern uintptr_t hat_kernelbase(uintptr_t); extern void hat_kmap_init(uintptr_t base, size_t len); extern hment_t *hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry); extern void mmu_calc_user_slots(void); extern void hat_tlb_inval(struct hat *hat, uintptr_t va); extern void hat_switch(struct hat *hat); #define TLB_RANGE_LEN(r) ((r)->tr_cnt << LEVEL_SHIFT((r)->tr_level)) /* * A range of virtual pages for purposes of demapping. */ typedef struct tlb_range { uintptr_t tr_va; /* address of page */ ulong_t tr_cnt; /* number of pages in range */ int8_t tr_level; /* page table level */ } tlb_range_t; #if defined(__xpv) #define XPV_DISALLOW_MIGRATE() xen_block_migrate() #define XPV_ALLOW_MIGRATE() xen_allow_migrate() #define mmu_flush_tlb_page(va) mmu_invlpg((caddr_t)va) #define mmu_flush_tlb_kpage(va) mmu_invlpg((caddr_t)va) /* * Interfaces to use around code that maps/unmaps grant table references. */ extern void hat_prepare_mapping(hat_t *, caddr_t, uint64_t *); extern void hat_release_mapping(hat_t *, caddr_t); #else #define XPV_DISALLOW_MIGRATE() /* nothing */ #define XPV_ALLOW_MIGRATE() /* nothing */ #define pfn_is_foreign(pfn) __lintzero typedef enum flush_tlb_type { FLUSH_TLB_ALL = 1, FLUSH_TLB_NONGLOBAL = 2, FLUSH_TLB_RANGE = 3, } flush_tlb_type_t; extern void mmu_flush_tlb(flush_tlb_type_t, tlb_range_t *); extern void mmu_flush_tlb_kpage(uintptr_t); extern void mmu_flush_tlb_page(uintptr_t); extern void hati_cpu_punchin(cpu_t *cpu, uintptr_t va, uint_t attrs); /* * routines to deal with delayed TLB invalidations for idle CPUs */ extern void tlb_going_idle(void); extern void tlb_service(void); #endif /* !__xpv */ #endif /* _KERNEL */ #ifdef __cplusplus } #endif #endif /* _VM_HAT_I86_H */