Merge branch 'irq/for-block' into irq/core
authorThomas Gleixner <tglx@linutronix.de>
Thu, 15 Sep 2016 18:54:40 +0000 (20:54 +0200)
committerThomas Gleixner <tglx@linutronix.de>
Thu, 15 Sep 2016 18:54:40 +0000 (20:54 +0200)
Add the new irq spreading infrastructure.

1  2 
drivers/pci/msi.c
kernel/irq/irqdesc.c

diff --combined drivers/pci/msi.c
@@@ -19,7 -19,6 +19,7 @@@
  #include <linux/smp.h>
  #include <linux/errno.h>
  #include <linux/io.h>
 +#include <linux/acpi_iort.h>
  #include <linux/slab.h>
  #include <linux/irqdomain.h>
  #include <linux/of_irq.h>
@@@ -550,15 -549,23 +550,23 @@@ error_attrs
        return ret;
  }
  
- static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec)
+ static struct msi_desc *
+ msi_setup_entry(struct pci_dev *dev, int nvec, bool affinity)
  {
-       u16 control;
+       struct cpumask *masks = NULL;
        struct msi_desc *entry;
+       u16 control;
+       if (affinity) {
+               masks = irq_create_affinity_masks(dev->irq_affinity, nvec);
+               if (!masks)
+                       pr_err("Unable to allocate affinity masks, ignoring\n");
+       }
  
        /* MSI Entry Initialization */
-       entry = alloc_msi_entry(&dev->dev);
+       entry = alloc_msi_entry(&dev->dev, nvec, masks);
        if (!entry)
-               return NULL;
+               goto out;
  
        pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
  
        entry->msi_attrib.default_irq   = dev->irq;     /* Save IOAPIC IRQ */
        entry->msi_attrib.multi_cap     = (control & PCI_MSI_FLAGS_QMASK) >> 1;
        entry->msi_attrib.multiple      = ilog2(__roundup_pow_of_two(nvec));
-       entry->nvec_used                = nvec;
-       entry->affinity                 = dev->irq_affinity;
  
        if (control & PCI_MSI_FLAGS_64BIT)
                entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64;
        if (entry->msi_attrib.maskbit)
                pci_read_config_dword(dev, entry->mask_pos, &entry->masked);
  
+ out:
+       kfree(masks);
        return entry;
  }
  
@@@ -609,7 -616,7 +617,7 @@@ static int msi_verify_entries(struct pc
   * an error, and a positive return value indicates the number of interrupts
   * which could have been allocated.
   */
- static int msi_capability_init(struct pci_dev *dev, int nvec)
+ static int msi_capability_init(struct pci_dev *dev, int nvec, bool affinity)
  {
        struct msi_desc *entry;
        int ret;
  
        pci_msi_set_enable(dev, 0);     /* Disable MSI during set up */
  
-       entry = msi_setup_entry(dev, nvec);
+       entry = msi_setup_entry(dev, nvec, affinity);
        if (!entry)
                return -ENOMEM;
  
@@@ -680,28 -687,29 +688,29 @@@ static void __iomem *msix_map_region(st
  }
  
  static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
-                             struct msix_entry *entries, int nvec)
+                             struct msix_entry *entries, int nvec,
+                             bool affinity)
  {
-       const struct cpumask *mask = NULL;
+       struct cpumask *curmsk, *masks = NULL;
        struct msi_desc *entry;
-       int cpu = -1, i;
-       for (i = 0; i < nvec; i++) {
-               if (dev->irq_affinity) {
-                       cpu = cpumask_next(cpu, dev->irq_affinity);
-                       if (cpu >= nr_cpu_ids)
-                               cpu = cpumask_first(dev->irq_affinity);
-                       mask = cpumask_of(cpu);
-               }
+       int ret, i;
+       if (affinity) {
+               masks = irq_create_affinity_masks(dev->irq_affinity, nvec);
+               if (!masks)
+                       pr_err("Unable to allocate affinity masks, ignoring\n");
+       }
  
-               entry = alloc_msi_entry(&dev->dev);
+       for (i = 0, curmsk = masks; i < nvec; i++) {
+               entry = alloc_msi_entry(&dev->dev, 1, curmsk);
                if (!entry) {
                        if (!i)
                                iounmap(base);
                        else
                                free_msi_irqs(dev);
                        /* No enough memory. Don't try again */
-                       return -ENOMEM;
+                       ret = -ENOMEM;
+                       goto out;
                }
  
                entry->msi_attrib.is_msix       = 1;
                        entry->msi_attrib.entry_nr = i;
                entry->msi_attrib.default_irq   = dev->irq;
                entry->mask_base                = base;
-               entry->nvec_used                = 1;
-               entry->affinity                 = mask;
  
                list_add_tail(&entry->list, dev_to_msi_list(&dev->dev));
+               if (masks)
+                       curmsk++;
        }
+       ret = 0;
+ out:
+       kfree(masks);
        return 0;
  }
  
@@@ -746,8 -756,8 +757,8 @@@ static void msix_program_entries(struc
   * single MSI-X irq. A return of zero indicates the successful setup of
   * requested MSI-X entries with allocated irqs or non-zero for otherwise.
   **/
- static int msix_capability_init(struct pci_dev *dev,
-                               struct msix_entry *entries, int nvec)
+ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries,
+                               int nvec, bool affinity)
  {
        int ret;
        u16 control;
        if (!base)
                return -ENOMEM;
  
-       ret = msix_setup_entries(dev, base, entries, nvec);
+       ret = msix_setup_entries(dev, base, entries, nvec, affinity);
        if (ret)
                return ret;
  
@@@ -942,22 -952,8 +953,8 @@@ int pci_msix_vec_count(struct pci_dev *
  }
  EXPORT_SYMBOL(pci_msix_vec_count);
  
- /**
-  * pci_enable_msix - configure device's MSI-X capability structure
-  * @dev: pointer to the pci_dev data structure of MSI-X device function
-  * @entries: pointer to an array of MSI-X entries (optional)
-  * @nvec: number of MSI-X irqs requested for allocation by device driver
-  *
-  * Setup the MSI-X capability structure of device function with the number
-  * of requested irqs upon its software driver call to request for
-  * MSI-X mode enabled on its hardware device function. A return of zero
-  * indicates the successful configuration of MSI-X capability structure
-  * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
-  * Or a return of > 0 indicates that driver request is exceeding the number
-  * of irqs or MSI-X vectors available. Driver should use the returned value to
-  * re-send its request.
-  **/
- int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
+ static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
+                            int nvec, bool affinity)
  {
        int nr_entries;
        int i, j;
                dev_info(&dev->dev, "can't enable MSI-X (MSI IRQ already assigned)\n");
                return -EINVAL;
        }
-       return msix_capability_init(dev, entries, nvec);
+       return msix_capability_init(dev, entries, nvec, affinity);
+ }
+ /**
+  * pci_enable_msix - configure device's MSI-X capability structure
+  * @dev: pointer to the pci_dev data structure of MSI-X device function
+  * @entries: pointer to an array of MSI-X entries (optional)
+  * @nvec: number of MSI-X irqs requested for allocation by device driver
+  *
+  * Setup the MSI-X capability structure of device function with the number
+  * of requested irqs upon its software driver call to request for
+  * MSI-X mode enabled on its hardware device function. A return of zero
+  * indicates the successful configuration of MSI-X capability structure
+  * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
+  * Or a return of > 0 indicates that driver request is exceeding the number
+  * of irqs or MSI-X vectors available. Driver should use the returned value to
+  * re-send its request.
+  **/
+ int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
+ {
+       return __pci_enable_msix(dev, entries, nvec, false);
  }
  EXPORT_SYMBOL(pci_enable_msix);
  
@@@ -1042,6 -1058,7 +1059,7 @@@ EXPORT_SYMBOL(pci_msi_enabled)
  static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
                unsigned int flags)
  {
+       bool affinity = flags & PCI_IRQ_AFFINITY;
        int nvec;
        int rc;
  
                nvec = maxvec;
  
        for (;;) {
-               if (flags & PCI_IRQ_AFFINITY) {
-                       dev->irq_affinity = irq_create_affinity_mask(&nvec);
+               if (affinity) {
+                       nvec = irq_calc_affinity_vectors(dev->irq_affinity,
+                                       nvec);
                        if (nvec < minvec)
                                return -ENOSPC;
                }
  
-               rc = msi_capability_init(dev, nvec);
+               rc = msi_capability_init(dev, nvec, affinity);
                if (rc == 0)
                        return nvec;
  
-               kfree(dev->irq_affinity);
-               dev->irq_affinity = NULL;
                if (rc < 0)
                        return rc;
                if (rc < minvec)
@@@ -1114,26 -1129,24 +1130,24 @@@ static int __pci_enable_msix_range(stru
                struct msix_entry *entries, int minvec, int maxvec,
                unsigned int flags)
  {
-       int nvec = maxvec;
-       int rc;
+       bool affinity = flags & PCI_IRQ_AFFINITY;
+       int rc, nvec = maxvec;
  
        if (maxvec < minvec)
                return -ERANGE;
  
        for (;;) {
-               if (flags & PCI_IRQ_AFFINITY) {
-                       dev->irq_affinity = irq_create_affinity_mask(&nvec);
+               if (affinity) {
+                       nvec = irq_calc_affinity_vectors(dev->irq_affinity,
+                                       nvec);
                        if (nvec < minvec)
                                return -ENOSPC;
                }
  
-               rc = pci_enable_msix(dev, entries, nvec);
+               rc = __pci_enable_msix(dev, entries, nvec, affinity);
                if (rc == 0)
                        return nvec;
  
-               kfree(dev->irq_affinity);
-               dev->irq_affinity = NULL;
                if (rc < 0)
                        return rc;
                if (rc < minvec)
@@@ -1257,6 -1270,37 +1271,37 @@@ int pci_irq_vector(struct pci_dev *dev
  }
  EXPORT_SYMBOL(pci_irq_vector);
  
+ /**
+  * pci_irq_get_affinity - return the affinity of a particular msi vector
+  * @dev:      PCI device to operate on
+  * @nr:               device-relative interrupt vector index (0-based).
+  */
+ const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr)
+ {
+       if (dev->msix_enabled) {
+               struct msi_desc *entry;
+               int i = 0;
+               for_each_pci_msi_entry(entry, dev) {
+                       if (i == nr)
+                               return entry->affinity;
+                       i++;
+               }
+               WARN_ON_ONCE(1);
+               return NULL;
+       } else if (dev->msi_enabled) {
+               struct msi_desc *entry = first_pci_msi_entry(dev);
+               if (WARN_ON_ONCE(!entry || nr >= entry->nvec_used))
+                       return NULL;
+               return &entry->affinity[nr];
+       } else {
+               return cpu_possible_mask;
+       }
+ }
+ EXPORT_SYMBOL(pci_irq_get_affinity);
  struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc)
  {
        return to_pci_dev(desc->dev);
@@@ -1503,8 -1547,8 +1548,8 @@@ u32 pci_msi_domain_get_msi_rid(struct i
        pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid);
  
        of_node = irq_domain_get_of_node(domain);
 -      if (of_node)
 -              rid = of_msi_map_rid(&pdev->dev, of_node, rid);
 +      rid = of_node ? of_msi_map_rid(&pdev->dev, of_node, rid) :
 +                      iort_msi_map_rid(&pdev->dev, rid);
  
        return rid;
  }
   */
  struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev)
  {
 +      struct irq_domain *dom;
        u32 rid = 0;
  
        pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid);
 -      return of_msi_map_get_device_domain(&pdev->dev, rid);
 +      dom = of_msi_map_get_device_domain(&pdev->dev, rid);
 +      if (!dom)
 +              dom = iort_get_device_domain(&pdev->dev, rid);
 +      return dom;
  }
  #endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */
diff --combined kernel/irq/irqdesc.c
@@@ -15,7 -15,6 +15,7 @@@
  #include <linux/radix-tree.h>
  #include <linux/bitmap.h>
  #include <linux/irqdomain.h>
 +#include <linux/sysfs.h>
  
  #include "internals.h"
  
@@@ -124,181 -123,6 +124,181 @@@ static DECLARE_BITMAP(allocated_irqs, I
  
  #ifdef CONFIG_SPARSE_IRQ
  
 +static void irq_kobj_release(struct kobject *kobj);
 +
 +#ifdef CONFIG_SYSFS
 +static struct kobject *irq_kobj_base;
 +
 +#define IRQ_ATTR_RO(_name) \
 +static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
 +
 +static ssize_t per_cpu_count_show(struct kobject *kobj,
 +                                struct kobj_attribute *attr, char *buf)
 +{
 +      struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
 +      int cpu, irq = desc->irq_data.irq;
 +      ssize_t ret = 0;
 +      char *p = "";
 +
 +      for_each_possible_cpu(cpu) {
 +              unsigned int c = kstat_irqs_cpu(irq, cpu);
 +
 +              ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%u", p, c);
 +              p = ",";
 +      }
 +
 +      ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
 +      return ret;
 +}
 +IRQ_ATTR_RO(per_cpu_count);
 +
 +static ssize_t chip_name_show(struct kobject *kobj,
 +                            struct kobj_attribute *attr, char *buf)
 +{
 +      struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
 +      ssize_t ret = 0;
 +
 +      raw_spin_lock_irq(&desc->lock);
 +      if (desc->irq_data.chip && desc->irq_data.chip->name) {
 +              ret = scnprintf(buf, PAGE_SIZE, "%s\n",
 +                              desc->irq_data.chip->name);
 +      }
 +      raw_spin_unlock_irq(&desc->lock);
 +
 +      return ret;
 +}
 +IRQ_ATTR_RO(chip_name);
 +
 +static ssize_t hwirq_show(struct kobject *kobj,
 +                        struct kobj_attribute *attr, char *buf)
 +{
 +      struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
 +      ssize_t ret = 0;
 +
 +      raw_spin_lock_irq(&desc->lock);
 +      if (desc->irq_data.domain)
 +              ret = sprintf(buf, "%d\n", (int)desc->irq_data.hwirq);
 +      raw_spin_unlock_irq(&desc->lock);
 +
 +      return ret;
 +}
 +IRQ_ATTR_RO(hwirq);
 +
 +static ssize_t type_show(struct kobject *kobj,
 +                       struct kobj_attribute *attr, char *buf)
 +{
 +      struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
 +      ssize_t ret = 0;
 +
 +      raw_spin_lock_irq(&desc->lock);
 +      ret = sprintf(buf, "%s\n",
 +                    irqd_is_level_type(&desc->irq_data) ? "level" : "edge");
 +      raw_spin_unlock_irq(&desc->lock);
 +
 +      return ret;
 +
 +}
 +IRQ_ATTR_RO(type);
 +
 +static ssize_t name_show(struct kobject *kobj,
 +                       struct kobj_attribute *attr, char *buf)
 +{
 +      struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
 +      ssize_t ret = 0;
 +
 +      raw_spin_lock_irq(&desc->lock);
 +      if (desc->name)
 +              ret = scnprintf(buf, PAGE_SIZE, "%s\n", desc->name);
 +      raw_spin_unlock_irq(&desc->lock);
 +
 +      return ret;
 +}
 +IRQ_ATTR_RO(name);
 +
 +static ssize_t actions_show(struct kobject *kobj,
 +                          struct kobj_attribute *attr, char *buf)
 +{
 +      struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
 +      struct irqaction *action;
 +      ssize_t ret = 0;
 +      char *p = "";
 +
 +      raw_spin_lock_irq(&desc->lock);
 +      for (action = desc->action; action != NULL; action = action->next) {
 +              ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
 +                               p, action->name);
 +              p = ",";
 +      }
 +      raw_spin_unlock_irq(&desc->lock);
 +
 +      if (ret)
 +              ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
 +
 +      return ret;
 +}
 +IRQ_ATTR_RO(actions);
 +
 +static struct attribute *irq_attrs[] = {
 +      &per_cpu_count_attr.attr,
 +      &chip_name_attr.attr,
 +      &hwirq_attr.attr,
 +      &type_attr.attr,
 +      &name_attr.attr,
 +      &actions_attr.attr,
 +      NULL
 +};
 +
 +static struct kobj_type irq_kobj_type = {
 +      .release        = irq_kobj_release,
 +      .sysfs_ops      = &kobj_sysfs_ops,
 +      .default_attrs  = irq_attrs,
 +};
 +
 +static void irq_sysfs_add(int irq, struct irq_desc *desc)
 +{
 +      if (irq_kobj_base) {
 +              /*
 +               * Continue even in case of failure as this is nothing
 +               * crucial.
 +               */
 +              if (kobject_add(&desc->kobj, irq_kobj_base, "%d", irq))
 +                      pr_warn("Failed to add kobject for irq %d\n", irq);
 +      }
 +}
 +
 +static int __init irq_sysfs_init(void)
 +{
 +      struct irq_desc *desc;
 +      int irq;
 +
 +      /* Prevent concurrent irq alloc/free */
 +      irq_lock_sparse();
 +
 +      irq_kobj_base = kobject_create_and_add("irq", kernel_kobj);
 +      if (!irq_kobj_base) {
 +              irq_unlock_sparse();
 +              return -ENOMEM;
 +      }
 +
 +      /* Add the already allocated interrupts */
 +      for_each_irq_desc(irq, desc)
 +              irq_sysfs_add(irq, desc);
 +      irq_unlock_sparse();
 +
 +      return 0;
 +}
 +postcore_initcall(irq_sysfs_init);
 +
 +#else /* !CONFIG_SYSFS */
 +
 +static struct kobj_type irq_kobj_type = {
 +      .release        = irq_kobj_release,
 +};
 +
 +static void irq_sysfs_add(int irq, struct irq_desc *desc) {}
 +
 +#endif /* CONFIG_SYSFS */
 +
  static RADIX_TREE(irq_desc_tree, GFP_KERNEL);
  
  static void irq_insert_desc(unsigned int irq, struct irq_desc *desc)
@@@ -363,7 -187,6 +363,7 @@@ static struct irq_desc *alloc_desc(int 
  
        desc_set_defaults(irq, desc, node, affinity, owner);
        irqd_set(&desc->irq_data, flags);
 +      kobject_init(&desc->kobj, &irq_kobj_type);
  
        return desc;
  
@@@ -374,22 -197,15 +374,22 @@@ err_desc
        return NULL;
  }
  
 -static void delayed_free_desc(struct rcu_head *rhp)
 +static void irq_kobj_release(struct kobject *kobj)
  {
 -      struct irq_desc *desc = container_of(rhp, struct irq_desc, rcu);
 +      struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
  
        free_masks(desc);
        free_percpu(desc->kstat_irqs);
        kfree(desc);
  }
  
 +static void delayed_free_desc(struct rcu_head *rhp)
 +{
 +      struct irq_desc *desc = container_of(rhp, struct irq_desc, rcu);
 +
 +      kobject_put(&desc->kobj);
 +}
 +
  static void free_desc(unsigned int irq)
  {
        struct irq_desc *desc = irq_to_desc(irq);
         * kstat_irq_usr(). Once we deleted the descriptor from the
         * sparse tree we can free it. Access in proc will fail to
         * lookup the descriptor.
 +       *
 +       * The sysfs entry must be serialized against a concurrent
 +       * irq_sysfs_init() as well.
         */
        mutex_lock(&sparse_irq_lock);
 +      kobject_del(&desc->kobj);
        delete_irq_desc(irq);
        mutex_unlock(&sparse_irq_lock);
  
@@@ -424,32 -236,30 +424,31 @@@ static int alloc_descs(unsigned int sta
        const struct cpumask *mask = NULL;
        struct irq_desc *desc;
        unsigned int flags;
-       int i, cpu = -1;
+       int i;
  
-       if (affinity && cpumask_empty(affinity))
-               return -EINVAL;
+       /* Validate affinity mask(s) */
+       if (affinity) {
+               for (i = 0, mask = affinity; i < cnt; i++, mask++) {
+                       if (cpumask_empty(mask))
+                               return -EINVAL;
+               }
+       }
  
        flags = affinity ? IRQD_AFFINITY_MANAGED : 0;
+       mask = NULL;
  
        for (i = 0; i < cnt; i++) {
                if (affinity) {
-                       cpu = cpumask_next(cpu, affinity);
-                       if (cpu >= nr_cpu_ids)
-                               cpu = cpumask_first(affinity);
-                       node = cpu_to_node(cpu);
-                       /*
-                        * For single allocations we use the caller provided
-                        * mask otherwise we use the mask of the target cpu
-                        */
-                       mask = cnt == 1 ? affinity : cpumask_of(cpu);
+                       node = cpu_to_node(cpumask_first(affinity));
+                       mask = affinity;
+                       affinity++;
                }
                desc = alloc_desc(start + i, node, flags, mask, owner);
                if (!desc)
                        goto err;
                mutex_lock(&sparse_irq_lock);
                irq_insert_desc(start + i, desc);
 +              irq_sysfs_add(start + i, desc);
                mutex_unlock(&sparse_irq_lock);
        }
        return start;
@@@ -670,9 -480,9 +669,9 @@@ EXPORT_SYMBOL_GPL(irq_free_descs)
   * @cnt:      Number of consecutive irqs to allocate.
   * @node:     Preferred node on which the irq descriptor should be allocated
   * @owner:    Owning module (can be NULL)
-  * @affinity: Optional pointer to an affinity mask which hints where the
-  *            irq descriptors should be allocated and which default
-  *            affinities to use
+  * @affinity: Optional pointer to an affinity mask array of size @cnt which
+  *            hints where the irq descriptors should be allocated and which
+  *            default affinities to use
   *
   * Returns the first irq number or error code
   */