aboutsummaryrefslogtreecommitdiff
path: root/sys/dev/pci
diff options
context:
space:
mode:
authorJohn Baldwin <jhb@FreeBSD.org>2007-05-02 17:50:36 +0000
committerJohn Baldwin <jhb@FreeBSD.org>2007-05-02 17:50:36 +0000
commite706f7f0c712b53fb752cc7e7d8490b11123ba07 (patch)
tree69dac9819e24c8ebcc3298470c47cfe7cc574930 /sys/dev/pci
parent9698b3b564a476ffd4148aa8dbd6c5bbe65e493b (diff)
downloadsrc-e706f7f0c712b53fb752cc7e7d8490b11123ba07.tar.gz
src-e706f7f0c712b53fb752cc7e7d8490b11123ba07.zip
Revamp the MSI/MSI-X code a bit to achieve two main goals:
- Simplify the amount of work that has be done for each architecture by pushing more of the truly MI code down into the PCI bus driver. - Don't bind MSI-X indicies to IRQs so that we can allow a driver to map multiple MSI-X messages into a single IRQ when handling a message shortage. The changes include: - Add a new pcib_if method: PCIB_MAP_MSI() which is called by the PCI bus to calculate the address and data values for a given MSI/MSI-X IRQ. The x86 nexus drivers map this into a call to a new 'msi_map()' function in msi.c that does the mapping. - Retire the pcib_if method PCIB_REMAP_MSIX() and remove the 'index' parameter from PCIB_ALLOC_MSIX(). MD code no longer has any knowledge of the MSI-X index for a given MSI-X IRQ. - The PCI bus driver now stores more MSI-X state in a child's ivars. Specifically, it now stores an array of IRQs (called "message vectors" in the code) that have associated address and data values, and a small virtual version of the MSI-X table that specifies the message vector that a given MSI-X table entry uses. Sparse mappings are permitted in the virtual table. - The PCI bus driver now configures the MSI and MSI-X address/data registers directly via custom bus_setup_intr() and bus_teardown_intr() methods. pci_setup_intr() invokes PCIB_MAP_MSI() to determine the address and data values for a given message as needed. The MD code no longer has to call back down into the PCI bus code to set these values from the nexus' bus_setup_intr() handler. - The PCI bus code provides a callout (pci_remap_msi_irq()) that the MD code can call to force the PCI bus to re-invoke PCIB_MAP_MSI() to get new values of the address and data fields for a given IRQ. The x86 MSI code uses this when an MSI IRQ is moved to a different CPU, requiring a new value of the 'address' field. - The x86 MSI psuedo-driver loses a lot of code, and in fact the separate MSI/MSI-X pseudo-PICs are collapsed down into a single MSI PIC driver since the only remaining diff between the two is a substring in a bootverbose printf. - The PCI bus driver will now restore MSI-X state (including programming entries in the MSI-X table) on device resume. - The interface for pci_remap_msix() has changed. Instead of accepting indices for the allocated vectors, it accepts a mini-virtual table (with a new length parameter). This table is an array of u_ints, where each value specifies which allocated message vector to use for the corresponding MSI-X message. A vector of 0 forces a message to not have an associated IRQ. The device may choose to only use some of the IRQs assigned, in which case the unused IRQs must be at the "end" and will be released back to the system. This allows a driver to use the same remap table for different shortage values. For example, if a driver wants 4 messages, it can use the same remap table (which only uses the first two messages) for the cases when it only gets 2 or 3 messages and in the latter case the PCI bus will release the 3rd IRQ back to the system. MFC after: 1 month
Notes
Notes: svn path=/head/; revision=169221
Diffstat (limited to 'sys/dev/pci')
-rw-r--r--sys/dev/pci/pci.c527
-rw-r--r--sys/dev/pci/pci_if.m3
-rw-r--r--sys/dev/pci/pci_pci.c23
-rw-r--r--sys/dev/pci/pci_private.h7
-rw-r--r--sys/dev/pci/pcib_if.m17
-rw-r--r--sys/dev/pci/pcib_private.h4
-rw-r--r--sys/dev/pci/pcivar.h37
7 files changed, 495 insertions, 123 deletions
diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c
index 52c1ff76a1ac..52935e7019e5 100644
--- a/sys/dev/pci/pci.c
+++ b/sys/dev/pci/pci.c
@@ -101,7 +101,16 @@ static void pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
int reg, uint32_t data);
#endif
static void pci_read_vpd(device_t pcib, pcicfgregs *cfg);
+static void pci_disable_msi(device_t dev);
+static void pci_enable_msi(device_t dev, uint64_t address,
+ uint16_t data);
+static void pci_enable_msix(device_t dev, u_int index,
+ uint64_t address, uint32_t data);
+static void pci_mask_msix(device_t dev, u_int index);
+static void pci_unmask_msix(device_t dev, u_int index);
static int pci_msi_blacklisted(void);
+static void pci_resume_msi(device_t dev);
+static void pci_resume_msix(device_t dev);
static device_method_t pci_methods[] = {
/* Device interface */
@@ -118,8 +127,8 @@ static device_method_t pci_methods[] = {
DEVMETHOD(bus_read_ivar, pci_read_ivar),
DEVMETHOD(bus_write_ivar, pci_write_ivar),
DEVMETHOD(bus_driver_added, pci_driver_added),
- DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
- DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
+ DEVMETHOD(bus_setup_intr, pci_setup_intr),
+ DEVMETHOD(bus_teardown_intr, pci_teardown_intr),
DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
DEVMETHOD(bus_set_resource, bus_generic_rl_set_resource),
@@ -1016,7 +1025,7 @@ pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
struct pcicfg_msix *msix = &dinfo->cfg.msix;
uint32_t offset;
- KASSERT(msix->msix_alloc > index, ("bogus index"));
+ KASSERT(msix->msix_table_len > index, ("bogus index"));
offset = msix->msix_table_offset + index * 16;
bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
@@ -1046,7 +1055,7 @@ pci_unmask_msix(device_t dev, u_int index)
struct pcicfg_msix *msix = &dinfo->cfg.msix;
uint32_t offset, val;
- KASSERT(msix->msix_alloc > index, ("bogus index"));
+ KASSERT(msix->msix_table_len > index, ("bogus index"));
offset = msix->msix_table_offset + index * 16 + 12;
val = bus_read_4(msix->msix_table_res, offset);
if (val & PCIM_MSIX_VCTRL_MASK) {
@@ -1062,13 +1071,46 @@ pci_pending_msix(device_t dev, u_int index)
struct pcicfg_msix *msix = &dinfo->cfg.msix;
uint32_t offset, bit;
- KASSERT(msix->msix_alloc > index, ("bogus index"));
+ KASSERT(msix->msix_table_len > index, ("bogus index"));
offset = msix->msix_pba_offset + (index / 32) * 4;
bit = 1 << index % 32;
return (bus_read_4(msix->msix_pba_res, offset) & bit);
}
/*
+ * Restore MSI-X registers and table during resume. If MSI-X is
+ * enabled then walk the virtual table to restore the actual MSI-X
+ * table.
+ */
+static void
+pci_resume_msix(device_t dev)
+{
+ struct pci_devinfo *dinfo = device_get_ivars(dev);
+ struct pcicfg_msix *msix = &dinfo->cfg.msix;
+ struct msix_table_entry *mte;
+ struct msix_vector *mv;
+ int i;
+
+ if (msix->msix_alloc > 0) {
+ /* First, mask all vectors. */
+ for (i = 0; i < msix->msix_msgnum; i++)
+ pci_mask_msix(dev, i);
+
+ /* Second, program any messages with at least one handler. */
+ for (i = 0; i < msix->msix_table_len; i++) {
+ mte = &msix->msix_table[i];
+ if (mte->mte_vector == 0 || mte->mte_handlers == 0)
+ continue;
+ mv = &msix->msix_vectors[mte->mte_vector - 1];
+ pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
+ pci_unmask_msix(dev, i);
+ }
+ }
+ pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
+ msix->msix_ctrl, 2);
+}
+
+/*
* Attempt to allocate *count MSI-X messages. The actual number allocated is
* returned in *count. After this function returns, each message will be
* available to the driver as SYS_RES_IRQ resources starting at rid 1.
@@ -1125,8 +1167,7 @@ pci_alloc_msix_method(device_t dev, device_t child, int *count)
max = min(*count, cfg->msix.msix_msgnum);
for (i = 0; i < max; i++) {
/* Allocate a message. */
- error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, i,
- &irq);
+ error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
if (error)
break;
resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
@@ -1183,6 +1224,17 @@ pci_alloc_msix_method(device_t dev, device_t child, int *count)
for (i = 0; i < cfg->msix.msix_msgnum; i++)
pci_mask_msix(child, i);
+ /* Allocate and initialize vector data and virtual table. */
+ cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
+ M_DEVBUF, M_WAITOK | M_ZERO);
+ cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
+ M_DEVBUF, M_WAITOK | M_ZERO);
+ for (i = 0; i < actual; i++) {
+ rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
+ cfg->msix.msix_vectors[i].mv_irq = rle->start;
+ cfg->msix.msix_table[i].mte_vector = i + 1;
+ }
+
/* Update control register to enable MSI-X. */
cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
@@ -1190,93 +1242,164 @@ pci_alloc_msix_method(device_t dev, device_t child, int *count)
/* Update counts of alloc'd messages. */
cfg->msix.msix_alloc = actual;
+ cfg->msix.msix_table_len = actual;
*count = actual;
return (0);
}
/*
- * By default, pci_alloc_msix() will assign the allocated IRQ resources to
- * the first N messages in the MSI-X table. However, device drivers may
- * want to use different layouts in the case that they do not allocate a
- * full table. This method allows the driver to specify what layout it
- * wants. It must be called after a successful pci_alloc_msix() but
- * before any of the associated SYS_RES_IRQ resources are allocated via
- * bus_alloc_resource(). The 'indices' array contains N (where N equals
- * the 'count' returned from pci_alloc_msix()) message indices. The
- * indices are 1-based (meaning the first message is at index 1). On
- * successful return, each of the messages in the 'indices' array will
- * have an associated SYS_RES_IRQ whose rid is equal to the index. Thus,
- * if indices contains { 2, 4 }, then upon successful return, the 'child'
- * device will have two SYS_RES_IRQ resources available at rids 2 and 4.
+ * By default, pci_alloc_msix() will assign the allocated IRQ
+ * resources consecutively to the first N messages in the MSI-X table.
+ * However, device drivers may want to use different layouts if they
+ * either receive fewer messages than they asked for, or they wish to
+ * populate the MSI-X table sparsely. This method allows the driver
+ * to specify what layout it wants. It must be called after a
+ * successful pci_alloc_msix() but before any of the associated
+ * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
+ *
+ * The 'vectors' array contains 'count' message vectors. The array
+ * maps directly to the MSI-X table in that index 0 in the array
+ * specifies the vector for the first message in the MSI-X table, etc.
+ * The vector value in each array index can either be 0 to indicate
+ * that no vector should be assigned to a message slot, or it can be a
+ * number from 1 to N (where N is the count returned from a
+ * succcessful call to pci_alloc_msix()) to indicate which message
+ * vector (IRQ) to be used for the corresponding message.
+ *
+ * On successful return, each message with a non-zero vector will have
+ * an associated SYS_RES_IRQ whose rid is equal to the array index +
+ * 1. Additionally, if any of the IRQs allocated via the previous
+ * call to pci_alloc_msix() are not used in the mapping, those IRQs
+ * will be freed back to the system automatically.
+ *
+ * For example, suppose a driver has a MSI-X table with 6 messages and
+ * asks for 6 messages, but pci_alloc_msix() only returns a count of
+ * 3. Call the three vectors allocated by pci_alloc_msix() A, B, and
+ * C. After the call to pci_alloc_msix(), the device will be setup to
+ * have an MSI-X table of ABC--- (where - means no vector assigned).
+ * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
+ * then the MSI-X table will look like A-AB-B, and the 'C' vector will
+ * be freed back to the system. This device will also have valid
+ * SYS_RES_IRQ rids of 1, 3, 4, and 6.
+ *
+ * In any case, the SYS_RES_IRQ rid X will always map to the message
+ * at MSI-X table index X - 1 and will only be valid if a vector is
+ * assigned to that table entry.
*/
int
-pci_remap_msix_method(device_t dev, device_t child, u_int *indices)
+pci_remap_msix_method(device_t dev, device_t child, int count,
+ const u_int *vectors)
{
struct pci_devinfo *dinfo = device_get_ivars(child);
- pcicfgregs *cfg = &dinfo->cfg;
+ struct pcicfg_msix *msix = &dinfo->cfg.msix;
struct resource_list_entry *rle;
- int count, error, i, j, *irqs;
+ int i, irq, j, *used;
- /* Sanity check the indices. */
- for (i = 0; i < cfg->msix.msix_alloc; i++)
- if (indices[i] == 0 || indices[i] > cfg->msix.msix_msgnum)
- return (EINVAL);
+ /*
+ * Have to have at least one message in the table but the
+ * table can't be bigger than the actual MSI-X table in the
+ * device.
+ */
+ if (count == 0 || count > msix->msix_msgnum)
+ return (EINVAL);
- /* Check for duplicates. */
- for (i = 0; i < cfg->msix.msix_alloc; i++)
- for (j = i + 1; j < cfg->msix.msix_alloc; j++)
- if (indices[i] == indices[j])
- return (EINVAL);
+ /* Sanity check the vectors. */
+ for (i = 0; i < count; i++)
+ if (vectors[i] > msix->msix_alloc)
+ return (EINVAL);
+ /*
+ * Make sure there aren't any holes in the vectors to be used.
+ * It's a big pain to support it, and it doesn't really make
+ * sense anyway. Also, at least one vector must be used.
+ */
+ used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
+ M_ZERO);
+ for (i = 0; i < count; i++)
+ if (vectors[i] != 0)
+ used[vectors[i] - 1] = 1;
+ for (i = 0; i < msix->msix_alloc - 1; i++)
+ if (used[i] == 0 && used[i + 1] == 1) {
+ free(used, M_DEVBUF);
+ return (EINVAL);
+ }
+ if (used[0] != 1) {
+ free(used, M_DEVBUF);
+ return (EINVAL);
+ }
+
/* Make sure none of the resources are allocated. */
- for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
- rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
- if (rle == NULL)
+ for (i = 0; i < msix->msix_table_len; i++) {
+ if (msix->msix_table[i].mte_vector == 0)
continue;
+ if (msix->msix_table[i].mte_handlers > 0)
+ return (EBUSY);
+ rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
+ KASSERT(rle != NULL, ("missing resource"));
if (rle->res != NULL)
return (EBUSY);
- count++;
}
- /* Save the IRQ values and free the existing resources. */
- irqs = malloc(sizeof(int) * cfg->msix.msix_alloc, M_TEMP, M_WAITOK);
- for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
- rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
- if (rle == NULL)
+ /* Free the existing resource list entries. */
+ for (i = 0; i < msix->msix_table_len; i++) {
+ if (msix->msix_table[i].mte_vector == 0)
continue;
- irqs[count] = rle->start;
- resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i);
- count++;
+ resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
}
- /* Map the IRQ values to the new message indices and rids. */
- for (i = 0; i < cfg->msix.msix_alloc; i++) {
- resource_list_add(&dinfo->resources, SYS_RES_IRQ, indices[i],
- irqs[i], irqs[i], 1);
-
- /*
- * The indices in the backend code (PCIB_* methods and the
- * MI helper routines for MD code such as pci_enable_msix())
- * are all zero-based. However, the indices passed to this
- * function are 1-based so that the correspond 1:1 with the
- * SYS_RES_IRQ resource IDs.
- */
- error = PCIB_REMAP_MSIX(device_get_parent(dev), child,
- indices[i] - 1, irqs[i]);
- KASSERT(error == 0, ("Failed to remap MSI-X message"));
+ /*
+ * Build the new virtual table keeping track of which vectors are
+ * used.
+ */
+ free(msix->msix_table, M_DEVBUF);
+ msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
+ M_DEVBUF, M_WAITOK | M_ZERO);
+ for (i = 0; i < count; i++)
+ msix->msix_table[i].mte_vector = vectors[i];
+ msix->msix_table_len = count;
+
+ /* Free any unused IRQs and resize the vectors array if necessary. */
+ j = msix->msix_alloc - 1;
+ if (used[j] == 0) {
+ struct msix_vector *vec;
+
+ while (used[j] == 0) {
+ PCIB_RELEASE_MSIX(device_get_parent(dev), child,
+ msix->msix_vectors[j].mv_irq);
+ j--;
+ }
+ vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
+ M_WAITOK);
+ bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
+ (j + 1));
+ free(msix->msix_vectors, M_DEVBUF);
+ msix->msix_vectors = vec;
+ msix->msix_alloc = j + 1;
+ }
+ free(used, M_DEVBUF);
+
+ /* Map the IRQs onto the rids. */
+ for (i = 0; i < count; i++) {
+ if (vectors[i] == 0)
+ continue;
+ irq = msix->msix_vectors[vectors[i]].mv_irq;
+ resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
+ irq, 1);
}
+
if (bootverbose) {
- if (cfg->msix.msix_alloc == 1)
- device_printf(child,
- "Remapped MSI-X IRQ to index %d\n", indices[0]);
- else {
- device_printf(child, "Remapped MSI-X IRQs to indices");
- for (i = 0; i < cfg->msix.msix_alloc - 1; i++)
- printf(" %d,", indices[i]);
- printf(" %d\n", indices[cfg->msix.msix_alloc - 1]);
+ device_printf(child, "Remapped MSI-X IRQs as: ");
+ for (i = 0; i < count; i++) {
+ if (i != 0)
+ printf(", ");
+ if (vectors[i] == 0)
+ printf("---");
+ else
+ printf("%d",
+ msix->msix_vectors[vectors[i]].mv_irq);
}
+ printf("\n");
}
- free(irqs, M_TEMP);
return (0);
}
@@ -1287,20 +1410,22 @@ pci_release_msix(device_t dev, device_t child)
struct pci_devinfo *dinfo = device_get_ivars(child);
struct pcicfg_msix *msix = &dinfo->cfg.msix;
struct resource_list_entry *rle;
- int count, i;
+ int i;
/* Do we have any messages to release? */
if (msix->msix_alloc == 0)
return (ENODEV);
/* Make sure none of the resources are allocated. */
- for (i = 1, count = 0; count < msix->msix_alloc; i++) {
- rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
- if (rle == NULL)
+ for (i = 0; i < msix->msix_table_len; i++) {
+ if (msix->msix_table[i].mte_vector == 0)
continue;
+ if (msix->msix_table[i].mte_handlers > 0)
+ return (EBUSY);
+ rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
+ KASSERT(rle != NULL, ("missing resource"));
if (rle->res != NULL)
return (EBUSY);
- count++;
}
/* Update control register to disable MSI-X. */
@@ -1308,18 +1433,20 @@ pci_release_msix(device_t dev, device_t child)
pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
msix->msix_ctrl, 2);
- /* Release the messages. */
- for (i = 1, count = 0; count < msix->msix_alloc; i++) {
- rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
- if (rle == NULL)
+ /* Free the resource list entries. */
+ for (i = 0; i < msix->msix_table_len; i++) {
+ if (msix->msix_table[i].mte_vector == 0)
continue;
- PCIB_RELEASE_MSIX(device_get_parent(dev), child,
- rle->start);
- resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i);
- count++;
+ resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
}
+ free(msix->msix_table, M_DEVBUF);
+ msix->msix_table_len = 0;
- /* Update alloc count. */
+ /* Release the IRQs. */
+ for (i = 0; i < msix->msix_alloc; i++)
+ PCIB_RELEASE_MSIX(device_get_parent(dev), child,
+ msix->msix_vectors[i].mv_irq);
+ free(msix->msix_vectors, M_DEVBUF);
msix->msix_alloc = 0;
return (0);
}
@@ -1351,8 +1478,6 @@ pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
struct pcicfg_msi *msi = &dinfo->cfg.msi;
/* Write data and address values. */
- msi->msi_addr = address;
- msi->msi_data = data;
pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
address & 0xffffffff, 4);
if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
@@ -1370,6 +1495,18 @@ pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
2);
}
+void
+pci_disable_msi(device_t dev)
+{
+ struct pci_devinfo *dinfo = device_get_ivars(dev);
+ struct pcicfg_msi *msi = &dinfo->cfg.msi;
+
+ /* Disable MSI in the control register. */
+ msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
+ pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
+ 2);
+}
+
/*
* Restore MSI registers during resume. If MSI is enabled then
* restore the data and address registers in addition to the control
@@ -1401,6 +1538,82 @@ pci_resume_msi(device_t dev)
2);
}
+int
+pci_remap_msi_irq(device_t dev, u_int irq)
+{
+ struct pci_devinfo *dinfo = device_get_ivars(dev);
+ pcicfgregs *cfg = &dinfo->cfg;
+ struct resource_list_entry *rle;
+ struct msix_table_entry *mte;
+ struct msix_vector *mv;
+ device_t bus;
+ uint64_t addr;
+ uint32_t data;
+ int error, i, j;
+
+ bus = device_get_parent(dev);
+
+ /*
+ * Handle MSI first. We try to find this IRQ among our list
+ * of MSI IRQs. If we find it, we request updated address and
+ * data registers and apply the results.
+ */
+ if (cfg->msi.msi_alloc > 0) {
+
+ /* If we don't have any active handlers, nothing to do. */
+ if (cfg->msi.msi_handlers == 0)
+ return (0);
+ for (i = 0; i < cfg->msi.msi_alloc; i++) {
+ rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
+ i + 1);
+ if (rle->start == irq) {
+ error = PCIB_MAP_MSI(device_get_parent(bus),
+ dev, irq, &addr, &data);
+ if (error)
+ return (error);
+ pci_disable_msi(dev);
+ dinfo->cfg.msi.msi_addr = addr;
+ dinfo->cfg.msi.msi_data = data;
+ pci_enable_msi(dev, addr, data);
+ return (0);
+ }
+ }
+ return (ENOENT);
+ }
+
+ /*
+ * For MSI-X, we check to see if we have this IRQ. If we do,
+ * we request the updated mapping info. If that works, we go
+ * through all the slots that use this IRQ and update them.
+ */
+ if (cfg->msix.msix_alloc > 0) {
+ for (i = 0; i < cfg->msix.msix_alloc; i++) {
+ mv = &cfg->msix.msix_vectors[i];
+ if (mv->mv_irq == irq) {
+ error = PCIB_MAP_MSI(device_get_parent(bus),
+ dev, irq, &addr, &data);
+ if (error)
+ return (error);
+ mv->mv_address = addr;
+ mv->mv_data = data;
+ for (j = 0; j < cfg->msix.msix_table_len; j++) {
+ mte = &cfg->msix.msix_table[j];
+ if (mte->mte_vector != i + 1)
+ continue;
+ if (mte->mte_handlers == 0)
+ continue;
+ pci_mask_msix(dev, j);
+ pci_enable_msix(dev, j, addr, data);
+ pci_unmask_msix(dev, j);
+ }
+ }
+ }
+ return (ENOENT);
+ }
+
+ return (ENOENT);
+}
+
/*
* Returns true if the specified device is blacklisted because MSI
* doesn't work.
@@ -1565,6 +1778,7 @@ pci_alloc_msi_method(device_t dev, device_t child, int *count)
/* Update counts of alloc'd messages. */
cfg->msi.msi_alloc = actual;
+ cfg->msi.msi_handlers = 0;
*count = actual;
return (0);
}
@@ -1589,6 +1803,8 @@ pci_release_msi_method(device_t dev, device_t child)
KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
/* Make sure none of the resources are allocated. */
+ if (msi->msi_handlers > 0)
+ return (EBUSY);
for (i = 0; i < msi->msi_alloc; i++) {
rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
KASSERT(rle != NULL, ("missing MSI resource"));
@@ -1597,8 +1813,10 @@ pci_release_msi_method(device_t dev, device_t child)
irqs[i] = rle->start;
}
- /* Update control register with 0 count and disable MSI. */
- msi->msi_ctrl &= ~(PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE);
+ /* Update control register with 0 count. */
+ KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
+ ("%s: MSI still enabled", __func__));
+ msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
msi->msi_ctrl, 2);
@@ -1609,6 +1827,8 @@ pci_release_msi_method(device_t dev, device_t child)
/* Update alloc count. */
msi->msi_alloc = 0;
+ msi->msi_addr = 0;
+ msi->msi_data = 0;
return (0);
}
@@ -2449,6 +2669,134 @@ pci_driver_added(device_t dev, driver_t *driver)
}
int
+pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
+ driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
+{
+ struct pci_devinfo *dinfo;
+ struct msix_table_entry *mte;
+ struct msix_vector *mv;
+ uint64_t addr;
+ uint32_t data;
+ void *cookie;
+ int error, rid;
+
+ error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
+ arg, &cookie);
+ if (error)
+ return (error);
+
+ /*
+ * If this is a direct child, check to see if the interrupt is
+ * MSI or MSI-X. If so, ask our parent to map the MSI and give
+ * us the address and data register values. If we fail for some
+ * reason, teardown the interrupt handler.
+ */
+ rid = rman_get_rid(irq);
+ if (device_get_parent(child) == dev && rid > 0) {
+ dinfo = device_get_ivars(child);
+ if (dinfo->cfg.msi.msi_alloc > 0) {
+ if (dinfo->cfg.msi.msi_addr == 0) {
+ KASSERT(dinfo->cfg.msi.msi_handlers == 0,
+ ("MSI has handlers, but vectors not mapped"));
+ error = PCIB_MAP_MSI(device_get_parent(dev),
+ child, rman_get_start(irq), &addr, &data);
+ if (error)
+ goto bad;
+ dinfo->cfg.msi.msi_addr = addr;
+ dinfo->cfg.msi.msi_data = data;
+ pci_enable_msi(child, addr, data);
+ }
+ dinfo->cfg.msi.msi_handlers++;
+ } else {
+ KASSERT(dinfo->cfg.msix.msix_alloc > 0,
+ ("No MSI or MSI-X interrupts allocated"));
+ KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
+ ("MSI-X index too high"));
+ mte = &dinfo->cfg.msix.msix_table[rid - 1];
+ KASSERT(mte->mte_vector != 0, ("no message vector"));
+ mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
+ KASSERT(mv->mv_irq == rman_get_start(irq),
+ ("IRQ mismatch"));
+ if (mv->mv_address == 0) {
+ KASSERT(mte->mte_handlers == 0,
+ ("MSI-X table entry has handlers, but vector not mapped"));
+ error = PCIB_MAP_MSI(device_get_parent(dev),
+ child, rman_get_start(irq), &addr, &data);
+ if (error)
+ goto bad;
+ mv->mv_address = addr;
+ mv->mv_data = data;
+ }
+ if (mte->mte_handlers == 0) {
+ pci_enable_msix(child, rid - 1, mv->mv_address,
+ mv->mv_data);
+ pci_unmask_msix(child, rid - 1);
+ }
+ mte->mte_handlers++;
+ }
+ bad:
+ if (error) {
+ (void)bus_generic_teardown_intr(dev, child, irq,
+ cookie);
+ return (error);
+ }
+ }
+ *cookiep = cookie;
+ return (0);
+}
+
+int
+pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
+ void *cookie)
+{
+ struct msix_table_entry *mte;
+ struct resource_list_entry *rle;
+ struct pci_devinfo *dinfo;
+ int error, rid;
+
+ /*
+ * If this is a direct child, check to see if the interrupt is
+ * MSI or MSI-X. If so, decrement the appropriate handlers
+ * count and mask the MSI-X message, or disable MSI messages
+ * if the count drops to 0.
+ */
+ if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
+ return (EINVAL);
+ rid = rman_get_rid(irq);
+ if (device_get_parent(child) == dev && rid > 0) {
+ dinfo = device_get_ivars(child);
+ rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
+ if (rle->res != irq)
+ return (EINVAL);
+ if (dinfo->cfg.msi.msi_alloc > 0) {
+ KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
+ ("MSI-X index too high"));
+ if (dinfo->cfg.msi.msi_handlers == 0)
+ return (EINVAL);
+ dinfo->cfg.msi.msi_handlers--;
+ if (dinfo->cfg.msi.msi_handlers == 0)
+ pci_disable_msi(child);
+ } else {
+ KASSERT(dinfo->cfg.msix.msix_alloc > 0,
+ ("No MSI or MSI-X interrupts allocated"));
+ KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
+ ("MSI-X index too high"));
+ mte = &dinfo->cfg.msix.msix_table[rid - 1];
+ if (mte->mte_handlers == 0)
+ return (EINVAL);
+ mte->mte_handlers--;
+ if (mte->mte_handlers == 0)
+ pci_mask_msix(child, rid - 1);
+ }
+ }
+ error = bus_generic_teardown_intr(dev, child, irq, cookie);
+ if (device_get_parent(child) == dev && rid > 0)
+ KASSERT(error == 0,
+ ("%s: generic teardown failed for MSI/MSI-X", __func__));
+ return (error);
+}
+
+int
pci_print_child(device_t dev, device_t child)
{
struct pci_devinfo *dinfo;
@@ -3222,12 +3570,11 @@ pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
- /*
- * Restore MSI configuration if it is present. If MSI is enabled,
- * then restore the data and addr registers.
- */
+ /* Restore MSI and MSI-X configurations if they are present. */
if (dinfo->cfg.msi.msi_location != 0)
pci_resume_msi(dev);
+ if (dinfo->cfg.msix.msix_location != 0)
+ pci_resume_msix(dev);
}
void
diff --git a/sys/dev/pci/pci_if.m b/sys/dev/pci/pci_if.m
index 266ec34d10f9..05dfa382d96b 100644
--- a/sys/dev/pci/pci_if.m
+++ b/sys/dev/pci/pci_if.m
@@ -127,7 +127,8 @@ METHOD int alloc_msix {
METHOD int remap_msix {
device_t dev;
device_t child;
- u_int *indices;
+ int count;
+ const u_int *vectors;
};
METHOD int release_msi {
diff --git a/sys/dev/pci/pci_pci.c b/sys/dev/pci/pci_pci.c
index d0ac287f9ae1..1b9a3eb8e12b 100644
--- a/sys/dev/pci/pci_pci.c
+++ b/sys/dev/pci/pci_pci.c
@@ -82,8 +82,8 @@ static device_method_t pcib_methods[] = {
DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi),
DEVMETHOD(pcib_release_msi, pcib_release_msi),
DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
- DEVMETHOD(pcib_remap_msix, pcib_remap_msix),
DEVMETHOD(pcib_release_msix, pcib_release_msix),
+ DEVMETHOD(pcib_map_msi, pcib_map_msi),
{ 0, 0 }
};
@@ -547,7 +547,7 @@ pcib_route_interrupt(device_t pcib, device_t dev, int pin)
return(intnum);
}
-/* Pass request to alloc MSI messages up to the parent bridge. */
+/* Pass request to alloc MSI/MSI-X messages up to the parent bridge. */
int
pcib_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs)
{
@@ -561,7 +561,7 @@ pcib_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs)
irqs));
}
-/* Pass request to release MSI messages up to the parent bridge. */
+/* Pass request to release MSI/MSI-X messages up to the parent bridge. */
int
pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs)
{
@@ -573,7 +573,7 @@ pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs)
/* Pass request to alloc an MSI-X message up to the parent bridge. */
int
-pcib_alloc_msix(device_t pcib, device_t dev, int index, int *irq)
+pcib_alloc_msix(device_t pcib, device_t dev, int *irq)
{
struct pcib_softc *sc = device_get_softc(dev);
device_t bus;
@@ -581,27 +581,28 @@ pcib_alloc_msix(device_t pcib, device_t dev, int index, int *irq)
if (sc->flags & PCIB_DISABLE_MSI)
return (ENXIO);
bus = device_get_parent(pcib);
- return (PCIB_ALLOC_MSIX(device_get_parent(bus), dev, index, irq));
+ return (PCIB_ALLOC_MSIX(device_get_parent(bus), dev, irq));
}
-/* Pass request to remap an MSI-X message up to the parent bridge. */
+/* Pass request to release an MSI-X message up to the parent bridge. */
int
-pcib_remap_msix(device_t pcib, device_t dev, int index, int irq)
+pcib_release_msix(device_t pcib, device_t dev, int irq)
{
device_t bus;
bus = device_get_parent(pcib);
- return (PCIB_REMAP_MSIX(device_get_parent(bus), dev, index, irq));
+ return (PCIB_RELEASE_MSIX(device_get_parent(bus), dev, irq));
}
-/* Pass request to release an MSI-X message up to the parent bridge. */
+/* Pass request to map MSI/MSI-X message up to parent bridge. */
int
-pcib_release_msix(device_t pcib, device_t dev, int irq)
+pcib_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr,
+ uint32_t *data)
{
device_t bus;
bus = device_get_parent(pcib);
- return (PCIB_RELEASE_MSIX(device_get_parent(bus), dev, irq));
+ return (PCIB_MAP_MSI(device_get_parent(bus), dev, irq, addr, data));
}
/*
diff --git a/sys/dev/pci/pci_private.h b/sys/dev/pci/pci_private.h
index 32be4aa59fa8..ce3ad384b307 100644
--- a/sys/dev/pci/pci_private.h
+++ b/sys/dev/pci/pci_private.h
@@ -49,6 +49,11 @@ int pci_read_ivar(device_t dev, device_t child, int which,
uintptr_t *result);
int pci_write_ivar(device_t dev, device_t child, int which,
uintptr_t value);
+int pci_setup_intr(device_t dev, device_t child,
+ struct resource *irq, int flags, driver_filter_t *filter,
+ driver_intr_t *intr, void *arg, void **cookiep);
+int pci_teardown_intr(device_t dev, device_t child,
+ struct resource *irq, void *cookie);
int pci_get_vpd_ident_method(device_t dev, device_t child,
const char **identptr);
int pci_get_vpd_readonly_method(device_t dev, device_t child,
@@ -69,7 +74,7 @@ int pci_find_extcap_method(device_t dev, device_t child,
int pci_alloc_msi_method(device_t dev, device_t child, int *count);
int pci_alloc_msix_method(device_t dev, device_t child, int *count);
int pci_remap_msix_method(device_t dev, device_t child,
- u_int *indices);
+ int count, const u_int *vectors);
int pci_release_msi_method(device_t dev, device_t child);
int pci_msi_count_method(device_t dev, device_t child);
int pci_msix_count_method(device_t dev, device_t child);
diff --git a/sys/dev/pci/pcib_if.m b/sys/dev/pci/pcib_if.m
index 18755faf6539..0b7e8bc62c9b 100644
--- a/sys/dev/pci/pcib_if.m
+++ b/sys/dev/pci/pcib_if.m
@@ -105,8 +105,8 @@ METHOD int alloc_msi {
};
#
-# Release 'count' MSI message mapped onto 'count' IRQs stored in the
-# array pointed to by 'irq'.
+# Release 'count' MSI messages mapped onto 'count' IRQs stored in the
+# array pointed to by 'irqs'.
#
METHOD int release_msi {
device_t pcib;
@@ -121,25 +121,26 @@ METHOD int release_msi {
METHOD int alloc_msix {
device_t pcib;
device_t dev;
- int index;
int *irq;
};
#
-# Remap a single MSI-X message to a different index.
+# Release a single MSI-X message mapped onto 'irq'.
#
-METHOD int remap_msix {
+METHOD int release_msix {
device_t pcib;
device_t dev;
- int index;
int irq;
};
#
-# Release a single MSI-X message mapped onto 'irq'.
+# Determine the MSI/MSI-X message address and data for 'irq'. The address
+# is returned in '*addr', and the data in '*data'.
#
-METHOD int release_msix {
+METHOD int map_msi {
device_t pcib;
device_t dev;
int irq;
+ uint64_t *addr;
+ uint32_t *data;
};
diff --git a/sys/dev/pci/pcib_private.h b/sys/dev/pci/pcib_private.h
index a571578e1155..aea476d4da1b 100644
--- a/sys/dev/pci/pcib_private.h
+++ b/sys/dev/pci/pcib_private.h
@@ -77,8 +77,8 @@ void pcib_write_config(device_t dev, int b, int s, int f, int reg, uint32_t val
int pcib_route_interrupt(device_t pcib, device_t dev, int pin);
int pcib_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs);
int pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs);
-int pcib_alloc_msix(device_t pcib, device_t dev, int index, int *irq);
-int pcib_remap_msix(device_t pcib, device_t dev, int index, int irq);
+int pcib_alloc_msix(device_t pcib, device_t dev, int *irq);
int pcib_release_msix(device_t pcib, device_t dev, int irq);
+int pcib_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr, uint32_t *data);
#endif
diff --git a/sys/dev/pci/pcivar.h b/sys/dev/pci/pcivar.h
index 0b1b5ade9fd4..13401d219c1b 100644
--- a/sys/dev/pci/pcivar.h
+++ b/sys/dev/pci/pcivar.h
@@ -83,18 +83,33 @@ struct pcicfg_msi {
int msi_alloc; /* Number of allocated messages. */
uint64_t msi_addr; /* Contents of address register. */
uint16_t msi_data; /* Contents of data register. */
+ u_int msi_handlers;
};
/* Interesting values for PCI MSI-X */
+struct msix_vector {
+ uint64_t mv_address; /* Contents of address register. */
+ uint32_t mv_data; /* Contents of data register. */
+ int mv_irq;
+};
+
+struct msix_table_entry {
+ u_int mte_vector; /* 1-based index into msix_vectors array. */
+ u_int mte_handlers;
+};
+
struct pcicfg_msix {
uint16_t msix_ctrl; /* Message Control */
- uint8_t msix_location; /* Offset of MSI-X capability registers. */
uint16_t msix_msgnum; /* Number of messages */
- int msix_alloc; /* Number of allocated messages. */
+ uint8_t msix_location; /* Offset of MSI-X capability registers. */
uint8_t msix_table_bar; /* BAR containing vector table. */
uint8_t msix_pba_bar; /* BAR containing PBA. */
uint32_t msix_table_offset;
uint32_t msix_pba_offset;
+ int msix_alloc; /* Number of allocated vectors. */
+ int msix_table_len; /* Length of virtual table. */
+ struct msix_table_entry *msix_table; /* Virtual table. */
+ struct msix_vector *msix_vectors; /* Array of allocated vectors. */
struct resource *msix_table_res; /* Resource containing vector table. */
struct resource *msix_pba_res; /* Resource containing PBA. */
};
@@ -403,9 +418,9 @@ pci_alloc_msix(device_t dev, int *count)
}
static __inline int
-pci_remap_msix(device_t dev, u_int *indices)
+pci_remap_msix(device_t dev, int count, const u_int *vectors)
{
- return (PCI_REMAP_MSIX(device_get_parent(dev), dev, indices));
+ return (PCI_REMAP_MSIX(device_get_parent(dev), dev, count, vectors));
}
static __inline int
@@ -429,13 +444,15 @@ pci_msix_count(device_t dev)
device_t pci_find_bsf(uint8_t, uint8_t, uint8_t);
device_t pci_find_device(uint16_t, uint16_t);
-/* Used by MD code to program MSI and MSI-X registers. */
-void pci_enable_msi(device_t dev, uint64_t address, uint16_t data);
-void pci_enable_msix(device_t dev, u_int index, uint64_t address,
- uint32_t data);
-void pci_mask_msix(device_t dev, u_int index);
+/*
+ * Can be used by MD code to request the PCI bus to re-map an MSI or
+ * MSI-X message.
+ */
+int pci_remap_msi_irq(device_t dev, u_int irq);
+
+/* Can be used by drivers to manage the MSI-X table. */
int pci_pending_msix(device_t dev, u_int index);
-void pci_unmask_msix(device_t dev, u_int index);
+
int pci_msi_device_blacklisted(device_t dev);
#endif /* _SYS_BUS_H_ */