/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include /* global variable for static default limit for non-IRM drivers */ extern int ddi_msix_alloc_limit; /* Extern declarations */ extern int (*psm_intr_ops)(dev_info_t *, ddi_intr_handle_impl_t *, psm_intr_op_t, int *); /* * Global variables for IRM pool configuration: * * (1) apix_system_max_vectors -- this would limit the maximum * number of interrupt vectors that will be made avilable * to the device drivers. The default value (-1) indicates * that all the available vectors could be used. * * (2) apix_irm_cpu_factor -- This would specify the number of CPUs that * should be excluded from the global IRM pool of interrupt vectors. * By default this would be zero, so vectors from all the CPUs * present will be factored into the IRM pool. * * (3) apix_irm_reserve_fixed_vectors -- This would specify the number * of vectors that should be reserved for FIXED type interrupts and * exclude them from the IRM pool. The value can be one of the * following: * 0 - no reservation (default) * - a positive number for the reserved cache * -1 - reserve the maximum needed * * (4) apix_irm_free_fixed_vectors -- This flag specifies if the * vectors for FIXED type should be freed and added back * to the IRM pool when ddi_intr_free() is called. The default * is to add it back to the pool. */ int apix_system_max_vectors = -1; int apix_irm_cpu_factor = 0; int apix_irm_reserve_fixed_vectors = 0; int apix_irm_free_fixed_vector = 1; /* info from APIX module for IRM configuration */ apix_irm_info_t apix_irminfo; kmutex_t apix_irm_lock; /* global mutex for apix_irm_* data */ ddi_irm_params_t apix_irm_params; /* IRM pool info */ int apix_irm_cache_size = 0; /* local cache for FIXED type requests */ int apix_irm_cpu_factor_available = 0; int apix_irm_max_cpus = 0; int apix_irm_cpus_used = 0; int apix_irm_fixed_intr_vectors_used; extern int ncpus; /* local data/functions */ static int apix_irm_chk_apix(); int apix_irm_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *handle, psm_intr_op_t op, int *result); int apix_irm_disable_intr(processorid_t); void apix_irm_enable_intr(processorid_t); int (*psm_intr_ops_saved)(dev_info_t *dip, ddi_intr_handle_impl_t *handle, psm_intr_op_t op, int *result) = NULL; int (*psm_disable_intr_saved)(processorid_t) = NULL; void (*psm_enable_intr_saved)(processorid_t) = NULL; int apix_irm_alloc_fixed(dev_info_t *, ddi_intr_handle_impl_t *, int *); int apix_irm_free_fixed(dev_info_t *, ddi_intr_handle_impl_t *, int *); /* * Initilaize IRM pool for APIC interrupts if the PSM module * is of APIX type. This should be called only after PSM module * is loaded and APIC interrupt system is initialized. */ void apix_irm_init(void) { dev_info_t *dip; int total_avail_vectors; int cpus_used; int cache_size; /* nothing to do if IRM is disabled */ if (!irm_enable) return; /* * Use root devinfo node to associate the IRM pool with it * as the pool is global to the system. */ dip = ddi_root_node(); /* * Check if PSM module is initialized and it is APIX * module (which supports IRM functionality). */ if ((psm_intr_ops == NULL) || !apix_irm_chk_apix()) { /* not an APIX module */ APIX_IRM_DEBUG((CE_CONT, "apix_irm_init: APIX module not present")); return; } /* * Now, determine the IRM pool parameters based on the * info from APIX module and global config variables. */ /* * apix_ncpus shows all the CPUs present in the * system but not all of them may have been enabled * (i.e. mp_startup() may not have been called yet). * So, use ncpus for IRM pool creation. */ if (apix_irminfo.apix_ncpus > ncpus) apix_irminfo.apix_ncpus = ncpus; /* apply the CPU factor if possible */ if ((apix_irm_cpu_factor > 0) && (apix_irminfo.apix_ncpus > apix_irm_cpu_factor)) { cpus_used = apix_irminfo.apix_ncpus - apix_irm_cpu_factor; apix_irm_cpu_factor_available = apix_irm_cpu_factor; } else { cpus_used = apix_irminfo.apix_ncpus; } apix_irm_cpus_used = apix_irm_max_cpus = cpus_used; APIX_IRM_DEBUG((CE_CONT, "apix_irm_init: %d CPUs used for IRM pool size", cpus_used)); total_avail_vectors = cpus_used * apix_irminfo.apix_per_cpu_vectors - apix_irminfo.apix_vectors_allocated; apix_irm_fixed_intr_vectors_used = apix_irminfo.apix_vectors_allocated; if (total_avail_vectors <= 0) { /* can not determine pool size */ APIX_IRM_DEBUG((CE_NOTE, "apix_irm_init: can not determine pool size")); return; } /* adjust the pool size as per the global config variable */ if ((apix_system_max_vectors > 0) && (apix_system_max_vectors < total_avail_vectors)) total_avail_vectors = apix_system_max_vectors; /* pre-reserve vectors (i.e. local cache) for FIXED type if needed */ if (apix_irm_reserve_fixed_vectors != 0) { cache_size = apix_irm_reserve_fixed_vectors; if ((cache_size == -1) || (cache_size > apix_irminfo.apix_ioapic_max_vectors)) cache_size = apix_irminfo.apix_ioapic_max_vectors; total_avail_vectors -= cache_size; apix_irm_cache_size = cache_size; } if (total_avail_vectors <= 0) { APIX_IRM_DEBUG((CE_NOTE, "apix_irm_init: invalid config parameters!")); return; } /* IRM pool is used only for MSI/X interrupts */ apix_irm_params.iparams_types = DDI_INTR_TYPE_MSI | DDI_INTR_TYPE_MSIX; apix_irm_params.iparams_total = total_avail_vectors; if (ndi_irm_create(dip, &apix_irm_params, &apix_irm_pool_p) == NDI_SUCCESS) { /* * re-direct psm_intr_ops to intercept FIXED * interrupt allocation requests. */ psm_intr_ops_saved = psm_intr_ops; psm_intr_ops = apix_irm_intr_ops; /* * re-direct psm_enable_intr()/psm_disable_intr() to * intercept CPU offline/online requests. */ psm_disable_intr_saved = psm_disable_intr; psm_enable_intr_saved = psm_enable_intr; psm_enable_intr = apix_irm_enable_intr; psm_disable_intr = apix_irm_disable_intr; mutex_init(&apix_irm_lock, NULL, MUTEX_DRIVER, NULL); /* * Set default alloc limit for non-IRM drivers * to DDI_MIN_MSIX_ALLOC (currently defined as 8). * * NOTE: This is done here so that the limit of 8 vectors * is applicable only with APIX module. For the old pcplusmp * implementation, the current default of 2 (i.e * DDI_DEFAULT_MSIX_ALLOC) is retained. */ if (ddi_msix_alloc_limit < DDI_MIN_MSIX_ALLOC) ddi_msix_alloc_limit = DDI_MIN_MSIX_ALLOC; } else { APIX_IRM_DEBUG((CE_NOTE, "apix_irm_init: ndi_irm_create() failed")); apix_irm_pool_p = NULL; } } /* * Check if the PSM module is "APIX" type which supports IRM feature. * Returns 0 if it is not an APIX module. */ static int apix_irm_chk_apix(void) { ddi_intr_handle_impl_t info_hdl; apic_get_type_t type_info; if (!psm_intr_ops) return (0); bzero(&info_hdl, sizeof (ddi_intr_handle_impl_t)); info_hdl.ih_private = &type_info; if (((*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_APIC_TYPE, NULL)) != PSM_SUCCESS) { /* unknown type; assume not an APIX module */ return (0); } if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) return (1); else return (0); } /* * This function intercepts PSM_INTR_OP_* requests to deal with * IRM pool maintainance for FIXED type interrupts. The following * commands are intercepted and the rest are simply passed back to * the original psm_intr_ops function: * PSM_INTR_OP_ALLOC_VECTORS * PSM_INTR_OP_FREE_VECTORS * Return value is either PSM_SUCCESS or PSM_FAILURE. */ int apix_irm_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *handle, psm_intr_op_t op, int *result) { switch (op) { case PSM_INTR_OP_ALLOC_VECTORS: if (handle->ih_type == DDI_INTR_TYPE_FIXED) return (apix_irm_alloc_fixed(dip, handle, result)); else break; case PSM_INTR_OP_FREE_VECTORS: if (handle->ih_type == DDI_INTR_TYPE_FIXED) return (apix_irm_free_fixed(dip, handle, result)); else break; default: break; } /* pass the request to APIX */ return ((*psm_intr_ops_saved)(dip, handle, op, result)); } /* * Allocate a FIXED type interrupt. The procedure for this * operation is as follows: * * 1) Check if this IRQ is shared (i.e. IRQ is already mapped * and a vector has been already allocated). If so, then no * new vector is needed and simply pass the request to APIX * and return. * 2) Check the local cache pool for an available vector. If * the cache is not empty then take it from there and simply * pass the request to APIX and return. * 3) Otherwise, get a vector from the IRM pool by reducing the * pool size by 1. If it is successful then pass the * request to APIX module. Otherwise return PSM_FAILURE. */ int apix_irm_alloc_fixed(dev_info_t *dip, ddi_intr_handle_impl_t *handle, int *result) { int vector; uint_t new_pool_size; int ret; /* * Check if this IRQ has been mapped (i.e. shared IRQ case) * by doing PSM_INTR_OP_XLATE_VECTOR. */ ret = (*psm_intr_ops_saved)(dip, handle, PSM_INTR_OP_XLATE_VECTOR, &vector); if (ret == PSM_SUCCESS) { APIX_IRM_DEBUG((CE_CONT, "apix_irm_alloc_fixed: dip %p (%s) xlated vector 0x%x", (void *)dip, ddi_driver_name(dip), vector)); /* (1) mapping already exists; pass the request to PSM */ return ((*psm_intr_ops_saved)(dip, handle, PSM_INTR_OP_ALLOC_VECTORS, result)); } /* check the local cache for an available vector */ mutex_enter(&apix_irm_lock); if (apix_irm_cache_size) { /* cache is not empty */ --apix_irm_cache_size; apix_irm_fixed_intr_vectors_used++; mutex_exit(&apix_irm_lock); /* (2) use the vector from the local cache */ return ((*psm_intr_ops_saved)(dip, handle, PSM_INTR_OP_ALLOC_VECTORS, result)); } /* (3) get a vector from the IRM pool */ new_pool_size = apix_irm_params.iparams_total - 1; APIX_IRM_DEBUG((CE_CONT, "apix_irm_alloc_fixed: dip %p (%s) resize pool" " from %x to %x\n", (void *)dip, ddi_driver_name(dip), apix_irm_pool_p->ipool_totsz, new_pool_size)); if (ndi_irm_resize_pool(apix_irm_pool_p, new_pool_size) == NDI_SUCCESS) { /* update the pool size info */ apix_irm_params.iparams_total = new_pool_size; apix_irm_fixed_intr_vectors_used++; mutex_exit(&apix_irm_lock); return ((*psm_intr_ops_saved)(dip, handle, PSM_INTR_OP_ALLOC_VECTORS, result)); } mutex_exit(&apix_irm_lock); return (PSM_FAILURE); } /* * Free up the FIXED type interrupt. * * 1) If it is a shared vector then simply pass the request to * APIX and return. * 2) Otherwise, if apix_irm_free_fixed_vector is not set then add the * vector back to the IRM pool. Otherwise, keep it in the local cache. */ int apix_irm_free_fixed(dev_info_t *dip, ddi_intr_handle_impl_t *handle, int *result) { int shared; int ret; uint_t new_pool_size; /* check if it is a shared vector */ ret = (*psm_intr_ops_saved)(dip, handle, PSM_INTR_OP_GET_SHARED, &shared); if ((ret == PSM_SUCCESS) && (shared > 0)) { /* (1) it is a shared vector; simply pass the request */ APIX_IRM_DEBUG((CE_CONT, "apix_irm_free_fixed: dip %p (%s) " "shared %d\n", (void *)dip, ddi_driver_name(dip), shared)); return ((*psm_intr_ops_saved)(dip, handle, PSM_INTR_OP_FREE_VECTORS, result)); } ret = (*psm_intr_ops_saved)(dip, handle, PSM_INTR_OP_FREE_VECTORS, result); if (ret == PSM_SUCCESS) { mutex_enter(&apix_irm_lock); if (apix_irm_free_fixed_vector) { /* (2) add the vector back to IRM pool */ new_pool_size = apix_irm_params.iparams_total + 1; APIX_IRM_DEBUG((CE_CONT, "apix_irm_free_fixed: " "dip %p (%s) resize pool from %x to %x\n", (void *)dip, ddi_driver_name(dip), apix_irm_pool_p->ipool_totsz, new_pool_size)); if (ndi_irm_resize_pool(apix_irm_pool_p, new_pool_size) == NDI_SUCCESS) { /* update the pool size info */ apix_irm_params.iparams_total = new_pool_size; } else { cmn_err(CE_NOTE, "apix_irm_free_fixed: failed to add" " a vector to IRM pool"); } } else { /* keep the vector in the local cache */ apix_irm_cache_size += 1; } apix_irm_fixed_intr_vectors_used--; mutex_exit(&apix_irm_lock); } return (ret); } /* * Disable the CPU for interrupts. It is assumed that this is called to * offline/disable the CPU so that no interrupts are allocated on * that CPU. For IRM perspective, the interrupt vectors on this * CPU are to be excluded for any allocations. * * If APIX module is successful in migrating all the vectors * from this CPU then reduce the IRM pool size to exclude the * interrupt vectors for that CPU. */ int apix_irm_disable_intr(processorid_t id) { uint_t new_pool_size; /* Interrupt disabling for Suspend/Resume */ if (apic_cpus[id].aci_status & APIC_CPU_SUSPEND) return ((*psm_disable_intr_saved)(id)); mutex_enter(&apix_irm_lock); /* * Don't remove the CPU from the IRM pool if we have CPU factor * available. */ if ((apix_irm_cpu_factor > 0) && (apix_irm_cpu_factor_available > 0)) { apix_irm_cpu_factor_available--; } else { /* can't disable if there is only one CPU used */ if (apix_irm_cpus_used == 1) { mutex_exit(&apix_irm_lock); return (PSM_FAILURE); } /* Calculate the new size for the IRM pool */ new_pool_size = apix_irm_params.iparams_total - apix_irminfo.apix_per_cpu_vectors; /* Apply the max. limit */ if (apix_system_max_vectors > 0) { uint_t max; max = apix_system_max_vectors - apix_irm_fixed_intr_vectors_used - apix_irm_cache_size; new_pool_size = MIN(new_pool_size, max); } if (new_pool_size == 0) { cmn_err(CE_WARN, "Invalid pool size 0 with " "apix_system_max_vectors = %d", apix_system_max_vectors); mutex_exit(&apix_irm_lock); return (PSM_FAILURE); } if (new_pool_size != apix_irm_params.iparams_total) { /* remove the CPU from the IRM pool */ if (ndi_irm_resize_pool(apix_irm_pool_p, new_pool_size) != NDI_SUCCESS) { mutex_exit(&apix_irm_lock); APIX_IRM_DEBUG((CE_NOTE, "apix_irm_disable_intr: failed to resize" " the IRM pool")); return (PSM_FAILURE); } /* update the pool size info */ apix_irm_params.iparams_total = new_pool_size; } /* decrement the CPU count used by IRM pool */ apix_irm_cpus_used--; } /* * Now, disable the CPU for interrupts. */ if ((*psm_disable_intr_saved)(id) != PSM_SUCCESS) { APIX_IRM_DEBUG((CE_NOTE, "apix_irm_disable_intr: failed to disable CPU interrupts" " for CPU#%d", id)); mutex_exit(&apix_irm_lock); return (PSM_FAILURE); } /* decrement the CPU count enabled for interrupts */ apix_irm_max_cpus--; mutex_exit(&apix_irm_lock); return (PSM_SUCCESS); } /* * Enable the CPU for interrupts. It is assumed that this function is * called to enable/online the CPU so that interrupts could be assigned * to it. If successful, add available vectors for that CPU to the IRM * pool if apix_irm_cpu_factor is already satisfied. */ void apix_irm_enable_intr(processorid_t id) { uint_t new_pool_size; /* Interrupt enabling for Suspend/Resume */ if (apic_cpus[id].aci_status & APIC_CPU_SUSPEND) { (*psm_enable_intr_saved)(id); return; } mutex_enter(&apix_irm_lock); /* enable the CPU for interrupts */ (*psm_enable_intr_saved)(id); /* increment the number of CPUs enabled for interrupts */ apix_irm_max_cpus++; ASSERT(apix_irminfo.apix_per_cpu_vectors > 0); /* * Check if the apix_irm_cpu_factor is satisfied before. * If satisfied, add the CPU to IRM pool. */ if ((apix_irm_cpu_factor > 0) && (apix_irm_cpu_factor_available < apix_irm_cpu_factor)) { /* * Don't add the CPU to the IRM pool. Just update * the available CPU factor. */ apix_irm_cpu_factor_available++; mutex_exit(&apix_irm_lock); return; } /* * Add the CPU to the IRM pool. */ /* increment the CPU count used by IRM */ apix_irm_cpus_used++; /* Calculate the new pool size */ new_pool_size = apix_irm_params.iparams_total + apix_irminfo.apix_per_cpu_vectors; /* Apply the max. limit */ if (apix_system_max_vectors > 0) { uint_t max; max = apix_system_max_vectors - apix_irm_fixed_intr_vectors_used - apix_irm_cache_size; new_pool_size = MIN(new_pool_size, max); } if (new_pool_size == apix_irm_params.iparams_total) { /* no change to pool size */ mutex_exit(&apix_irm_lock); return; } if (new_pool_size < apix_irm_params.iparams_total) { cmn_err(CE_WARN, "new_pool_size %d is inconsistent " "with irm_params.iparams_total %d", new_pool_size, apix_irm_params.iparams_total); mutex_exit(&apix_irm_lock); return; } (void) ndi_irm_resize_pool(apix_irm_pool_p, new_pool_size); /* update the pool size info */ apix_irm_params.iparams_total = new_pool_size; mutex_exit(&apix_irm_lock); }