drivers/edac/edac_mc.c

   1 /*
   2  * edac_mc kernel module
   3  * (C) 2005, 2006 Linux Networx (http://lnxi.com)
   4  * This file may be distributed under the terms of the
   5  * GNU General Public License.
   6  *
   7  * Written by Thayne Harbaugh
   8  * Based on work by Dan Hollis <goemon at anime dot net> and others.
   9  *      http://www.anime.net/~goemon/linux-ecc/
  10  *
  11  * Modified by Dave Peterson and Doug Thompson
  12  *
  13  */
  14
  15 #include <linux/module.h>
  16 #include <linux/proc_fs.h>
  17 #include <linux/kernel.h>
  18 #include <linux/types.h>
  19 #include <linux/smp.h>
  20 #include <linux/init.h>
  21 #include <linux/sysctl.h>
  22 #include <linux/highmem.h>
  23 #include <linux/timer.h>
  24 #include <linux/slab.h>
  25 #include <linux/jiffies.h>
  26 #include <linux/spinlock.h>
  27 #include <linux/list.h>
  28 #include <linux/ctype.h>
  29 #include <linux/edac.h>
  30 #include <linux/bitops.h>
  31 #include <asm/uaccess.h>
  32 #include <asm/page.h>
  33 #include <asm/edac.h>
  34 #include "edac_core.h"
  35 #include "edac_module.h"
  36
  37 #define CREATE_TRACE_POINTS
  38 #define TRACE_INCLUDE_PATH ../../include/ras
  39 #include <ras/ras_event.h>
  40
  41 /* lock to memory controller's control array */
  42 static DEFINE_MUTEX(mem_ctls_mutex);
  43 static LIST_HEAD(mc_devices);
  44
  45 #ifdef CONFIG_EDAC_DEBUG
  46
  47 static void edac_mc_dump_channel(struct rank_info *chan)
  48 {
  49         debugf4("\tchannel = %p\n", chan);
  50         debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
  51         debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
  52         debugf4("\tchannel->dimm = %p\n", chan->dimm);
  53 }
  54
  55 static void edac_mc_dump_dimm(struct dimm_info *dimm)
  56 {
  57         int i;
  58
  59         debugf4("\tdimm = %p\n", dimm);
  60         debugf4("\tdimm->label = '%s'\n", dimm->label);
  61         debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
  62         debugf4("\tdimm location ");
  63         for (i = 0; i < dimm->mci->n_layers; i++) {
  64                 printk(KERN_CONT "%d", dimm->location[i]);
  65                 if (i < dimm->mci->n_layers - 1)
  66                         printk(KERN_CONT ".");
  67         }
  68         printk(KERN_CONT "\n");
  69         debugf4("\tdimm->grain = %d\n", dimm->grain);
  70         debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
  71 }
  72
  73 static void edac_mc_dump_csrow(struct csrow_info *csrow)
  74 {
  75         debugf4("\tcsrow = %p\n", csrow);
  76         debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
  77         debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
  78         debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
  79         debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
  80         debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
  81         debugf4("\tcsrow->channels = %p\n", csrow->channels);
  82         debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
  83 }
  84
  85 static void edac_mc_dump_mci(struct mem_ctl_info *mci)
  86 {
  87         debugf3("\tmci = %p\n", mci);
  88         debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
  89         debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
  90         debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
  91         debugf4("\tmci->edac_check = %p\n", mci->edac_check);
  92         debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
  93                 mci->nr_csrows, mci->csrows);
  94         debugf3("\tmci->nr_dimms = %d, dimms = %p\n",
  95                 mci->tot_dimms, mci->dimms);
  96         debugf3("\tdev = %p\n", mci->pdev);
  97         debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
  98         debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
  99 }
 100
 101 #endif                          /* CONFIG_EDAC_DEBUG */
 102
 103 /*
 104  * keep those in sync with the enum mem_type
 105  */
 106 const char *edac_mem_types[] = {
 107         "Empty csrow",
 108         "Reserved csrow type",
 109         "Unknown csrow type",
 110         "Fast page mode RAM",
 111         "Extended data out RAM",
 112         "Burst Extended data out RAM",
 113         "Single data rate SDRAM",
 114         "Registered single data rate SDRAM",
 115         "Double data rate SDRAM",
 116         "Registered Double data rate SDRAM",
 117         "Rambus DRAM",
 118         "Unbuffered DDR2 RAM",
 119         "Fully buffered DDR2",
 120         "Registered DDR2 RAM",
 121         "Rambus XDR",
 122         "Unbuffered DDR3 RAM",
 123         "Registered DDR3 RAM",
 124 };
 125 EXPORT_SYMBOL_GPL(edac_mem_types);
 126
 127 /**
 128  * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
 129  * @p:          pointer to a pointer with the memory offset to be used. At
 130  *              return, this will be incremented to point to the next offset
 131  * @size:       Size of the data structure to be reserved
 132  * @n_elems:    Number of elements that should be reserved
 133  *
 134  * If 'size' is a constant, the compiler will optimize this whole function
 135  * down to either a no-op or the addition of a constant to the value of '*p'.
 136  *
 137  * The 'p' pointer is absolutely needed to keep the proper advancing
 138  * further in memory to the proper offsets when allocating the struct along
 139  * with its embedded structs, as edac_device_alloc_ctl_info() does it
 140  * above, for example.
 141  *
 142  * At return, the pointer 'p' will be incremented to be used on a next call
 143  * to this function.
 144  */
 145 void *edac_align_ptr(void **p, unsigned size, int n_elems)
 146 {
 147         unsigned align, r;
 148         void *ptr = *p;
 149
 150         *p += size * n_elems;
 151
 152         /*
 153          * 'p' can possibly be an unaligned item X such that sizeof(X) is
 154          * 'size'.  Adjust 'p' so that its alignment is at least as
 155          * stringent as what the compiler would provide for X and return
 156          * the aligned result.
 157          * Here we assume that the alignment of a "long long" is the most
 158          * stringent alignment that the compiler will ever provide by default.
 159          * As far as I know, this is a reasonable assumption.
 160          */
 161         if (size > sizeof(long))
 162                 align = sizeof(long long);
 163         else if (size > sizeof(int))
 164                 align = sizeof(long);
 165         else if (size > sizeof(short))
 166                 align = sizeof(int);
 167         else if (size > sizeof(char))
 168                 align = sizeof(short);
 169         else
 170                 return (char *)ptr;
 171
 172         r = size % align;
 173
 174         if (r == 0)
 175                 return (char *)ptr;
 176
 177         *p += align - r;
 178
 179         return (void *)(((unsigned long)ptr) + align - r);
 180 }
 181
 182 /**
 183  * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
 184  * @mc_num:             Memory controller number
 185  * @n_layers:           Number of MC hierarchy layers
 186  * layers:              Describes each layer as seen by the Memory Controller
 187  * @size_pvt:           size of private storage needed
 188  *
 189  *
 190  * Everything is kmalloc'ed as one big chunk - more efficient.
 191  * Only can be used if all structures have the same lifetime - otherwise
 192  * you have to allocate and initialize your own structures.
 193  *
 194  * Use edac_mc_free() to free mc structures allocated by this function.
 195  *
 196  * NOTE: drivers handle multi-rank memories in different ways: in some
 197  * drivers, one multi-rank memory stick is mapped as one entry, while, in
 198  * others, a single multi-rank memory stick would be mapped into several
 199  * entries. Currently, this function will allocate multiple struct dimm_info
 200  * on such scenarios, as grouping the multiple ranks require drivers change.
 201  *
 202  * Returns:
 203  *      On failure: NULL
 204  *      On success: struct mem_ctl_info pointer
 205  */
 206 struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 207                                    unsigned n_layers,
 208                                    struct edac_mc_layer *layers,
 209                                    unsigned sz_pvt)
 210 {
 211         struct mem_ctl_info *mci;
 212         struct edac_mc_layer *layer;
 213         struct csrow_info *csr;
 214         struct rank_info *chan;
 215         struct dimm_info *dimm;
 216         u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
 217         unsigned pos[EDAC_MAX_LAYERS];
 218         unsigned size, tot_dimms = 1, count = 1;
 219         unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
 220         void *pvt, *p, *ptr = NULL;
 221         int i, j, row, chn, n, len, off;
 222         bool per_rank = false;
 223
 224         BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
 225         /*
 226          * Calculate the total amount of dimms and csrows/cschannels while
 227          * in the old API emulation mode
 228          */
 229         for (i = 0; i < n_layers; i++) {
 230                 tot_dimms *= layers[i].size;
 231                 if (layers[i].is_virt_csrow)
 232                         tot_csrows *= layers[i].size;
 233                 else
 234                         tot_channels *= layers[i].size;
 235
 236                 if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
 237                         per_rank = true;
 238         }
 239
 240         /* Figure out the offsets of the various items from the start of an mc
 241          * structure.  We want the alignment of each item to be at least as
 242          * stringent as what the compiler would provide if we could simply
 243          * hardcode everything into a single struct.
 244          */
 245         mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
 246         layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
 247         for (i = 0; i < n_layers; i++) {
 248                 count *= layers[i].size;
 249                 debugf4("%s: errcount layer %d size %d\n", __func__, i, count);
 250                 ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
 251                 ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
 252                 tot_errcount += 2 * count;
 253         }
 254
 255         debugf4("%s: allocating %d error counters\n", __func__, tot_errcount);
 256         pvt = edac_align_ptr(&ptr, sz_pvt, 1);
 257         size = ((unsigned long)pvt) + sz_pvt;
 258
 259         debugf1("%s(): allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
 260                 __func__, size,
 261                 tot_dimms,
 262                 per_rank ? "ranks" : "dimms",
 263                 tot_csrows * tot_channels);
 264
 265         mci = kzalloc(size, GFP_KERNEL);
 266         if (mci == NULL)
 267                 return NULL;
 268
 269         /* Adjust pointers so they point within the memory we just allocated
 270          * rather than an imaginary chunk of memory located at address 0.
 271          */
 272         layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
 273         for (i = 0; i < n_layers; i++) {
 274                 mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
 275                 mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
 276         }
 277         pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
 278
 279         /* setup index and various internal pointers */
 280         mci->mc_idx = mc_num;
 281         mci->tot_dimms = tot_dimms;
 282         mci->pvt_info = pvt;
 283         mci->n_layers = n_layers;
 284         mci->layers = layer;
 285         memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
 286         mci->nr_csrows = tot_csrows;
 287         mci->num_cschannel = tot_channels;
 288         mci->mem_is_per_rank = per_rank;
 289
 290         /*
 291          * Alocate and fill the csrow/channels structs
 292          */
 293         mci->csrows = kcalloc(sizeof(*mci->csrows), tot_csrows, GFP_KERNEL);
 294         if (!mci->csrows)
 295                 goto error;
 296         for (row = 0; row < tot_csrows; row++) {
 297                 csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
 298                 if (!csr)
 299                         goto error;
 300                 mci->csrows[row] = csr;
 301                 csr->csrow_idx = row;
 302                 csr->mci = mci;
 303                 csr->nr_channels = tot_channels;
 304                 csr->channels = kcalloc(sizeof(*csr->channels), tot_channels,
 305                                         GFP_KERNEL);
 306                 if (!csr->channels)
 307                         goto error;
 308
 309                 for (chn = 0; chn < tot_channels; chn++) {
 310                         chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
 311                         if (!chan)
 312                                 goto error;
 313                         csr->channels[chn] = chan;
 314                         chan->chan_idx = chn;
 315                         chan->csrow = csr;
 316                 }
 317         }
 318
 319         /*
 320          * Allocate and fill the dimm structs
 321          */
 322         mci->dimms  = kcalloc(sizeof(*mci->dimms), tot_dimms, GFP_KERNEL);
 323         if (!mci->dimms)
 324                 goto error;
 325
 326         memset(&pos, 0, sizeof(pos));
 327         row = 0;
 328         chn = 0;
 329         debugf4("%s: initializing %d %s\n", __func__, tot_dimms,
 330                 per_rank ? "ranks" : "dimms");
 331         for (i = 0; i < tot_dimms; i++) {
 332                 chan = mci->csrows[row]->channels[chn];
 333                 off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]);
 334                 if (off < 0 || off >= tot_dimms) {
 335                         edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n");
 336                         goto error;
 337                 }
 338
 339                 dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
 340                 mci->dimms[off] = dimm;
 341                 dimm->mci = mci;
 342
 343                 debugf2("%s: %d: %s%i (%d:%d:%d): row %d, chan %d\n", __func__,
 344                         i, per_rank ? "rank" : "dimm", off,
 345                         pos[0], pos[1], pos[2], row, chn);
 346
 347                 /*
 348                  * Copy DIMM location and initialize it.
 349                  */
 350                 len = sizeof(dimm->label);
 351                 p = dimm->label;
 352                 n = snprintf(p, len, "mc#%u", mc_num);
 353                 p += n;
 354                 len -= n;
 355                 for (j = 0; j < n_layers; j++) {
 356                         n = snprintf(p, len, "%s#%u",
 357                                      edac_layer_name[layers[j].type],
 358                                      pos[j]);
 359                         p += n;
 360                         len -= n;
 361                         dimm->location[j] = pos[j];
 362
 363                         if (len <= 0)
 364                                 break;
 365                 }
 366
 367                 /* Link it to the csrows old API data */
 368                 chan->dimm = dimm;
 369                 dimm->csrow = row;
 370                 dimm->cschannel = chn;
 371
 372                 /* Increment csrow location */
 373                 row++;
 374                 if (row == tot_csrows) {
 375                         row = 0;
 376                         chn++;
 377                 }
 378
 379                 /* Increment dimm location */
 380                 for (j = n_layers - 1; j >= 0; j--) {
 381                         pos[j]++;
 382                         if (pos[j] < layers[j].size)
 383                                 break;
 384                         pos[j] = 0;
 385                 }
 386         }
 387
 388         mci->op_state = OP_ALLOC;
 389
 390         /* at this point, the root kobj is valid, and in order to
 391          * 'free' the object, then the function:
 392          *      edac_mc_unregister_sysfs_main_kobj() must be called
 393          * which will perform kobj unregistration and the actual free
 394          * will occur during the kobject callback operation
 395          */
 396
 397         return mci;
 398
 399 error:
 400         if (mci->dimms) {
 401                 for (i = 0; i < tot_dimms; i++)
 402                         kfree(mci->dimms[i]);
 403                 kfree(mci->dimms);
 404         }
 405         if (mci->csrows) {
 406                 for (chn = 0; chn < tot_channels; chn++) {
 407                         csr = mci->csrows[chn];
 408                         if (csr) {
 409                                 for (chn = 0; chn < tot_channels; chn++)
 410                                         kfree(csr->channels[chn]);
 411                                 kfree(csr);
 412                         }
 413                         kfree(mci->csrows[i]);
 414                 }
 415                 kfree(mci->csrows);
 416         }
 417         kfree(mci);
 418
 419         return NULL;
 420 }
 421 EXPORT_SYMBOL_GPL(edac_mc_alloc);
 422
 423 /**
 424  * edac_mc_free
 425  *      'Free' a previously allocated 'mci' structure
 426  * @mci: pointer to a struct mem_ctl_info structure
 427  */
 428 void edac_mc_free(struct mem_ctl_info *mci)
 429 {
 430         debugf1("%s()\n", __func__);
 431
 432         /* the mci instance is freed here, when the sysfs object is dropped */
 433         edac_unregister_sysfs(mci);
 434 }
 435 EXPORT_SYMBOL_GPL(edac_mc_free);
 436
 437
 438 /**
 439  * find_mci_by_dev
 440  *
 441  *      scan list of controllers looking for the one that manages
 442  *      the 'dev' device
 443  * @dev: pointer to a struct device related with the MCI
 444  */
 445 struct mem_ctl_info *find_mci_by_dev(struct device *dev)
 446 {
 447         struct mem_ctl_info *mci;
 448         struct list_head *item;
 449
 450         debugf3("%s()\n", __func__);
 451
 452         list_for_each(item, &mc_devices) {
 453                 mci = list_entry(item, struct mem_ctl_info, link);
 454
 455                 if (mci->pdev == dev)
 456                         return mci;
 457         }
 458
 459         return NULL;
 460 }
 461 EXPORT_SYMBOL_GPL(find_mci_by_dev);
 462
 463 /*
 464  * handler for EDAC to check if NMI type handler has asserted interrupt
 465  */
 466 static int edac_mc_assert_error_check_and_clear(void)
 467 {
 468         int old_state;
 469
 470         if (edac_op_state == EDAC_OPSTATE_POLL)
 471                 return 1;
 472
 473         old_state = edac_err_assert;
 474         edac_err_assert = 0;
 475
 476         return old_state;
 477 }
 478
 479 /*
 480  * edac_mc_workq_function
 481  *      performs the operation scheduled by a workq request
 482  */
 483 static void edac_mc_workq_function(struct work_struct *work_req)
 484 {
 485         struct delayed_work *d_work = to_delayed_work(work_req);
 486         struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
 487
 488         mutex_lock(&mem_ctls_mutex);
 489
 490         /* if this control struct has movd to offline state, we are done */
 491         if (mci->op_state == OP_OFFLINE) {
 492                 mutex_unlock(&mem_ctls_mutex);
 493                 return;
 494         }
 495
 496         /* Only poll controllers that are running polled and have a check */
 497         if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
 498                 mci->edac_check(mci);
 499
 500         mutex_unlock(&mem_ctls_mutex);
 501
 502         /* Reschedule */
 503         queue_delayed_work(edac_workqueue, &mci->work,
 504                         msecs_to_jiffies(edac_mc_get_poll_msec()));
 505 }
 506
 507 /*
 508  * edac_mc_workq_setup
 509  *      initialize a workq item for this mci
 510  *      passing in the new delay period in msec
 511  *
 512  *      locking model:
 513  *
 514  *              called with the mem_ctls_mutex held
 515  */
 516 static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
 517 {
 518         debugf0("%s()\n", __func__);
 519
 520         /* if this instance is not in the POLL state, then simply return */
 521         if (mci->op_state != OP_RUNNING_POLL)
 522                 return;
 523
 524         INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
 525         queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
 526 }
 527
 528 /*
 529  * edac_mc_workq_teardown
 530  *      stop the workq processing on this mci
 531  *
 532  *      locking model:
 533  *
 534  *              called WITHOUT lock held
 535  */
 536 static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
 537 {
 538         int status;
 539
 540         if (mci->op_state != OP_RUNNING_POLL)
 541                 return;
 542
 543         status = cancel_delayed_work(&mci->work);
 544         if (status == 0) {
 545                 debugf0("%s() not canceled, flush the queue\n",
 546                         __func__);
 547
 548                 /* workq instance might be running, wait for it */
 549                 flush_workqueue(edac_workqueue);
 550         }
 551 }
 552
 553 /*
 554  * edac_mc_reset_delay_period(unsigned long value)
 555  *
 556  *      user space has updated our poll period value, need to
 557  *      reset our workq delays
 558  */
 559 void edac_mc_reset_delay_period(int value)
 560 {
 561         struct mem_ctl_info *mci;
 562         struct list_head *item;
 563
 564         mutex_lock(&mem_ctls_mutex);
 565
 566         /* scan the list and turn off all workq timers, doing so under lock
 567          */
 568         list_for_each(item, &mc_devices) {
 569                 mci = list_entry(item, struct mem_ctl_info, link);
 570
 571                 if (mci->op_state == OP_RUNNING_POLL)
 572                         cancel_delayed_work(&mci->work);
 573         }
 574
 575         mutex_unlock(&mem_ctls_mutex);
 576
 577
 578         /* re-walk the list, and reset the poll delay */
 579         mutex_lock(&mem_ctls_mutex);
 580
 581         list_for_each(item, &mc_devices) {
 582                 mci = list_entry(item, struct mem_ctl_info, link);
 583
 584                 edac_mc_workq_setup(mci, (unsigned long) value);
 585         }
 586
 587         mutex_unlock(&mem_ctls_mutex);
 588 }
 589
 590
 591
 592 /* Return 0 on success, 1 on failure.
 593  * Before calling this function, caller must
 594  * assign a unique value to mci->mc_idx.
 595  *
 596  *      locking model:
 597  *
 598  *              called with the mem_ctls_mutex lock held
 599  */
 600 static int add_mc_to_global_list(struct mem_ctl_info *mci)
 601 {
 602         struct list_head *item, *insert_before;
 603         struct mem_ctl_info *p;
 604
 605         insert_before = &mc_devices;
 606
 607         p = find_mci_by_dev(mci->pdev);
 608         if (unlikely(p != NULL))
 609                 goto fail0;
 610
 611         list_for_each(item, &mc_devices) {
 612                 p = list_entry(item, struct mem_ctl_info, link);
 613
 614                 if (p->mc_idx >= mci->mc_idx) {
 615                         if (unlikely(p->mc_idx == mci->mc_idx))
 616                                 goto fail1;
 617
 618                         insert_before = item;
 619                         break;
 620                 }
 621         }
 622
 623         list_add_tail_rcu(&mci->link, insert_before);
 624         atomic_inc(&edac_handlers);
 625         return 0;
 626
 627 fail0:
 628         edac_printk(KERN_WARNING, EDAC_MC,
 629                 "%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
 630                 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
 631         return 1;
 632
 633 fail1:
 634         edac_printk(KERN_WARNING, EDAC_MC,
 635                 "bug in low-level driver: attempt to assign\n"
 636                 "    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
 637         return 1;
 638 }
 639
 640 static void del_mc_from_global_list(struct mem_ctl_info *mci)
 641 {
 642         atomic_dec(&edac_handlers);
 643         list_del_rcu(&mci->link);
 644
 645         /* these are for safe removal of devices from global list while
 646          * NMI handlers may be traversing list
 647          */
 648         synchronize_rcu();
 649         INIT_LIST_HEAD(&mci->link);
 650 }
 651
 652 /**
 653  * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
 654  *
 655  * If found, return a pointer to the structure.
 656  * Else return NULL.
 657  *
 658  * Caller must hold mem_ctls_mutex.
 659  */
 660 struct mem_ctl_info *edac_mc_find(int idx)
 661 {
 662         struct list_head *item;
 663         struct mem_ctl_info *mci;
 664
 665         list_for_each(item, &mc_devices) {
 666                 mci = list_entry(item, struct mem_ctl_info, link);
 667
 668                 if (mci->mc_idx >= idx) {
 669                         if (mci->mc_idx == idx)
 670                                 return mci;
 671
 672                         break;
 673                 }
 674         }
 675
 676         return NULL;
 677 }
 678 EXPORT_SYMBOL(edac_mc_find);
 679
 680 /**
 681  * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
 682  *                 create sysfs entries associated with mci structure
 683  * @mci: pointer to the mci structure to be added to the list
 684  *
 685  * Return:
 686  *      0       Success
 687  *      !0      Failure
 688  */
 689
 690 /* FIXME - should a warning be printed if no error detection? correction? */
 691 int edac_mc_add_mc(struct mem_ctl_info *mci)
 692 {
 693         debugf0("%s()\n", __func__);
 694
 695 #ifdef CONFIG_EDAC_DEBUG
 696         if (edac_debug_level >= 3)
 697                 edac_mc_dump_mci(mci);
 698
 699         if (edac_debug_level >= 4) {
 700                 int i;
 701
 702                 for (i = 0; i < mci->nr_csrows; i++) {
 703                         int j;
 704
 705                         edac_mc_dump_csrow(mci->csrows[i]);
 706                         for (j = 0; j < mci->csrows[i]->nr_channels; j++)
 707                                 edac_mc_dump_channel(mci->csrows[i]->channels[j]);
 708                 }
 709                 for (i = 0; i < mci->tot_dimms; i++)
 710                         edac_mc_dump_dimm(mci->dimms[i]);
 711         }
 712 #endif
 713         mutex_lock(&mem_ctls_mutex);
 714
 715         if (add_mc_to_global_list(mci))
 716                 goto fail0;
 717
 718         /* set load time so that error rate can be tracked */
 719         mci->start_time = jiffies;
 720
 721         if (edac_create_sysfs_mci_device(mci)) {
 722                 edac_mc_printk(mci, KERN_WARNING,
 723                         "failed to create sysfs device\n");
 724                 goto fail1;
 725         }
 726
 727         /* If there IS a check routine, then we are running POLLED */
 728         if (mci->edac_check != NULL) {
 729                 /* This instance is NOW RUNNING */
 730                 mci->op_state = OP_RUNNING_POLL;
 731
 732                 edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
 733         } else {
 734                 mci->op_state = OP_RUNNING_INTERRUPT;
 735         }
 736
 737         /* Report action taken */
 738         edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
 739                 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 740
 741         mutex_unlock(&mem_ctls_mutex);
 742         return 0;
 743
 744 fail1:
 745         del_mc_from_global_list(mci);
 746
 747 fail0:
 748         mutex_unlock(&mem_ctls_mutex);
 749         return 1;
 750 }
 751 EXPORT_SYMBOL_GPL(edac_mc_add_mc);
 752
 753 /**
 754  * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
 755  *                 remove mci structure from global list
 756  * @pdev: Pointer to 'struct device' representing mci structure to remove.
 757  *
 758  * Return pointer to removed mci structure, or NULL if device not found.
 759  */
 760 struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
 761 {
 762         struct mem_ctl_info *mci;
 763
 764         debugf0("%s()\n", __func__);
 765
 766         mutex_lock(&mem_ctls_mutex);
 767
 768         /* find the requested mci struct in the global list */
 769         mci = find_mci_by_dev(dev);
 770         if (mci == NULL) {
 771                 mutex_unlock(&mem_ctls_mutex);
 772                 return NULL;
 773         }
 774
 775         del_mc_from_global_list(mci);
 776         mutex_unlock(&mem_ctls_mutex);
 777
 778         /* flush workq processes */
 779         edac_mc_workq_teardown(mci);
 780
 781         /* marking MCI offline */
 782         mci->op_state = OP_OFFLINE;
 783
 784         /* remove from sysfs */
 785         edac_remove_sysfs_mci_device(mci);
 786
 787         edac_printk(KERN_INFO, EDAC_MC,
 788                 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
 789                 mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 790
 791         return mci;
 792 }
 793 EXPORT_SYMBOL_GPL(edac_mc_del_mc);
 794
 795 static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
 796                                 u32 size)
 797 {
 798         struct page *pg;
 799         void *virt_addr;
 800         unsigned long flags = 0;
 801
 802         debugf3("%s()\n", __func__);
 803
 804         /* ECC error page was not in our memory. Ignore it. */
 805         if (!pfn_valid(page))
 806                 return;
 807
 808         /* Find the actual page structure then map it and fix */
 809         pg = pfn_to_page(page);
 810
 811         if (PageHighMem(pg))
 812                 local_irq_save(flags);
 813
 814         virt_addr = kmap_atomic(pg);
 815
 816         /* Perform architecture specific atomic scrub operation */
 817         atomic_scrub(virt_addr + offset, size);
 818
 819         /* Unmap and complete */
 820         kunmap_atomic(virt_addr);
 821
 822         if (PageHighMem(pg))
 823                 local_irq_restore(flags);
 824 }
 825
 826 /* FIXME - should return -1 */
 827 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
 828 {
 829         struct csrow_info **csrows = mci->csrows;
 830         int row, i, j, n;
 831
 832         debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
 833         row = -1;
 834
 835         for (i = 0; i < mci->nr_csrows; i++) {
 836                 struct csrow_info *csrow = csrows[i];
 837                 n = 0;
 838                 for (j = 0; j < csrow->nr_channels; j++) {
 839                         struct dimm_info *dimm = csrow->channels[j]->dimm;
 840                         n += dimm->nr_pages;
 841                 }
 842                 if (n == 0)
 843                         continue;
 844
 845                 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
 846                         "mask(0x%lx)\n", mci->mc_idx, __func__,
 847                         csrow->first_page, page, csrow->last_page,
 848                         csrow->page_mask);
 849
 850                 if ((page >= csrow->first_page) &&
 851                     (page <= csrow->last_page) &&
 852                     ((page & csrow->page_mask) ==
 853                      (csrow->first_page & csrow->page_mask))) {
 854                         row = i;
 855                         break;
 856                 }
 857         }
 858
 859         if (row == -1)
 860                 edac_mc_printk(mci, KERN_ERR,
 861                         "could not look up page error address %lx\n",
 862                         (unsigned long)page);
 863
 864         return row;
 865 }
 866 EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
 867
 868 const char *edac_layer_name[] = {
 869         [EDAC_MC_LAYER_BRANCH] = "branch",
 870         [EDAC_MC_LAYER_CHANNEL] = "channel",
 871         [EDAC_MC_LAYER_SLOT] = "slot",
 872         [EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
 873 };
 874 EXPORT_SYMBOL_GPL(edac_layer_name);
 875
 876 static void edac_inc_ce_error(struct mem_ctl_info *mci,
 877                                     bool enable_per_layer_report,
 878                                     const int pos[EDAC_MAX_LAYERS])
 879 {
 880         int i, index = 0;
 881
 882         mci->ce_mc++;
 883
 884         if (!enable_per_layer_report) {
 885                 mci->ce_noinfo_count++;
 886                 return;
 887         }
 888
 889         for (i = 0; i < mci->n_layers; i++) {
 890                 if (pos[i] < 0)
 891                         break;
 892                 index += pos[i];
 893                 mci->ce_per_layer[i][index]++;
 894
 895                 if (i < mci->n_layers - 1)
 896                         index *= mci->layers[i + 1].size;
 897         }
 898 }
 899
 900 static void edac_inc_ue_error(struct mem_ctl_info *mci,
 901                                     bool enable_per_layer_report,
 902                                     const int pos[EDAC_MAX_LAYERS])
 903 {
 904         int i, index = 0;
 905
 906         mci->ue_mc++;
 907
 908         if (!enable_per_layer_report) {
 909                 mci->ce_noinfo_count++;
 910                 return;
 911         }
 912
 913         for (i = 0; i < mci->n_layers; i++) {
 914                 if (pos[i] < 0)
 915                         break;
 916                 index += pos[i];
 917                 mci->ue_per_layer[i][index]++;
 918
 919                 if (i < mci->n_layers - 1)
 920                         index *= mci->layers[i + 1].size;
 921         }
 922 }
 923
 924 static void edac_ce_error(struct mem_ctl_info *mci,
 925                           const int pos[EDAC_MAX_LAYERS],
 926                           const char *msg,
 927                           const char *location,
 928                           const char *label,
 929                           const char *detail,
 930                           const char *other_detail,
 931                           const bool enable_per_layer_report,
 932                           const unsigned long page_frame_number,
 933                           const unsigned long offset_in_page,
 934                           long grain)
 935 {
 936         unsigned long remapped_page;
 937
 938         if (edac_mc_get_log_ce()) {
 939                 if (other_detail && *other_detail)
 940                         edac_mc_printk(mci, KERN_WARNING,
 941                                        "CE %s on %s (%s %s - %s)\n",
 942                                        msg, label, location,
 943                                        detail, other_detail);
 944                 else
 945                         edac_mc_printk(mci, KERN_WARNING,
 946                                        "CE %s on %s (%s %s)\n",
 947                                        msg, label, location,
 948                                        detail);
 949         }
 950         edac_inc_ce_error(mci, enable_per_layer_report, pos);
 951
 952         if (mci->scrub_mode & SCRUB_SW_SRC) {
 953                 /*
 954                         * Some memory controllers (called MCs below) can remap
 955                         * memory so that it is still available at a different
 956                         * address when PCI devices map into memory.
 957                         * MC's that can't do this, lose the memory where PCI
 958                         * devices are mapped. This mapping is MC-dependent
 959                         * and so we call back into the MC driver for it to
 960                         * map the MC page to a physical (CPU) page which can
 961                         * then be mapped to a virtual page - which can then
 962                         * be scrubbed.
 963                         */
 964                 remapped_page = mci->ctl_page_to_phys ?
 965                         mci->ctl_page_to_phys(mci, page_frame_number) :
 966                         page_frame_number;
 967
 968                 edac_mc_scrub_block(remapped_page,
 969                                         offset_in_page, grain);
 970         }
 971 }
 972
 973 static void edac_ue_error(struct mem_ctl_info *mci,
 974                           const int pos[EDAC_MAX_LAYERS],
 975                           const char *msg,
 976                           const char *location,
 977                           const char *label,
 978                           const char *detail,
 979                           const char *other_detail,
 980                           const bool enable_per_layer_report)
 981 {
 982         if (edac_mc_get_log_ue()) {
 983                 if (other_detail && *other_detail)
 984                         edac_mc_printk(mci, KERN_WARNING,
 985                                        "UE %s on %s (%s %s - %s)\n",
 986                                        msg, label, location, detail,
 987                                        other_detail);
 988                 else
 989                         edac_mc_printk(mci, KERN_WARNING,
 990                                        "UE %s on %s (%s %s)\n",
 991                                        msg, label, location, detail);
 992         }
 993
 994         if (edac_mc_get_panic_on_ue()) {
 995                 if (other_detail && *other_detail)
 996                         panic("UE %s on %s (%s%s - %s)\n",
 997                               msg, label, location, detail, other_detail);
 998                 else
 999                         panic("UE %s on %s (%s%s)\n",
1000                               msg, label, location, detail);
1001         }
1002
1003         edac_inc_ue_error(mci, enable_per_layer_report, pos);
1004 }
1005
1006 #define OTHER_LABEL " or "
1007
1008 /**
1009  * edac_mc_handle_error - reports a memory event to userspace
1010  *
1011  * @type:               severity of the error (CE/UE/Fatal)
1012  * @mci:                a struct mem_ctl_info pointer
1013  * @page_frame_number:  mem page where the error occurred
1014  * @offset_in_page:     offset of the error inside the page
1015  * @syndrome:           ECC syndrome
1016  * @top_layer:          Memory layer[0] position
1017  * @mid_layer:          Memory layer[1] position
1018  * @low_layer:          Memory layer[2] position
1019  * @msg:                Message meaningful to the end users that
1020  *                      explains the event
1021  * @other_detail:       Technical details about the event that
1022  *                      may help hardware manufacturers and
1023  *                      EDAC developers to analyse the event
1024  * @arch_log:           Architecture-specific struct that can
1025  *                      be used to add extended information to the
1026  *                      tracepoint, like dumping MCE registers.
1027  */
1028 void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1029                           struct mem_ctl_info *mci,
1030                           const unsigned long page_frame_number,
1031                           const unsigned long offset_in_page,
1032                           const unsigned long syndrome,
1033                           const int top_layer,
1034                           const int mid_layer,
1035                           const int low_layer,
1036                           const char *msg,
1037                           const char *other_detail,
1038                           const void *arch_log)
1039 {
1040         /* FIXME: too much for stack: move it to some pre-alocated area */
1041         char detail[80], location[80];
1042         char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
1043         char *p;
1044         int row = -1, chan = -1;
1045         int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
1046         int i;
1047         long grain;
1048         bool enable_per_layer_report = false;
1049         u16 error_count;        /* FIXME: make it a parameter */
1050         u8 grain_bits;
1051
1052         debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
1053
1054         /*
1055          * Check if the event report is consistent and if the memory
1056          * location is known. If it is known, enable_per_layer_report will be
1057          * true, the DIMM(s) label info will be filled and the per-layer
1058          * error counters will be incremented.
1059          */
1060         for (i = 0; i < mci->n_layers; i++) {
1061                 if (pos[i] >= (int)mci->layers[i].size) {
1062                         if (type == HW_EVENT_ERR_CORRECTED)
1063                                 p = "CE";
1064                         else
1065                                 p = "UE";
1066
1067                         edac_mc_printk(mci, KERN_ERR,
1068                                        "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1069                                        edac_layer_name[mci->layers[i].type],
1070                                        pos[i], mci->layers[i].size);
1071                         /*
1072                          * Instead of just returning it, let's use what's
1073                          * known about the error. The increment routines and
1074                          * the DIMM filter logic will do the right thing by
1075                          * pointing the likely damaged DIMMs.
1076                          */
1077                         pos[i] = -1;
1078                 }
1079                 if (pos[i] >= 0)
1080                         enable_per_layer_report = true;
1081         }
1082
1083         /*
1084          * Get the dimm label/grain that applies to the match criteria.
1085          * As the error algorithm may not be able to point to just one memory
1086          * stick, the logic here will get all possible labels that could
1087          * pottentially be affected by the error.
1088          * On FB-DIMM memory controllers, for uncorrected errors, it is common
1089          * to have only the MC channel and the MC dimm (also called "branch")
1090          * but the channel is not known, as the memory is arranged in pairs,
1091          * where each memory belongs to a separate channel within the same
1092          * branch.
1093          */
1094         grain = 0;
1095         p = label;
1096         *p = '\0';
1097         for (i = 0; i < mci->tot_dimms; i++) {
1098                 struct dimm_info *dimm = mci->dimms[i];
1099
1100                 if (top_layer >= 0 && top_layer != dimm->location[0])
1101                         continue;
1102                 if (mid_layer >= 0 && mid_layer != dimm->location[1])
1103                         continue;
1104                 if (low_layer >= 0 && low_layer != dimm->location[2])
1105                         continue;
1106
1107                 /* get the max grain, over the error match range */
1108                 if (dimm->grain > grain)
1109                         grain = dimm->grain;
1110
1111                 /*
1112                  * If the error is memory-controller wide, there's no need to
1113                  * seek for the affected DIMMs because the whole
1114                  * channel/memory controller/...  may be affected.
1115                  * Also, don't show errors for empty DIMM slots.
1116                  */
1117                 if (enable_per_layer_report && dimm->nr_pages) {
1118                         if (p != label) {
1119                                 strcpy(p, OTHER_LABEL);
1120                                 p += strlen(OTHER_LABEL);
1121                         }
1122                         strcpy(p, dimm->label);
1123                         p += strlen(p);
1124                         *p = '\0';
1125
1126                         /*
1127                          * get csrow/channel of the DIMM, in order to allow
1128                          * incrementing the compat API counters
1129                          */
1130                         debugf4("%s: %s csrows map: (%d,%d)\n",
1131                                 __func__,
1132                                 mci->mem_is_per_rank ? "rank" : "dimm",
1133                                 dimm->csrow, dimm->cschannel);
1134
1135                         if (row == -1)
1136                                 row = dimm->csrow;
1137                         else if (row >= 0 && row != dimm->csrow)
1138                                 row = -2;
1139
1140                         if (chan == -1)
1141                                 chan = dimm->cschannel;
1142                         else if (chan >= 0 && chan != dimm->cschannel)
1143                                 chan = -2;
1144                 }
1145         }
1146
1147         if (!enable_per_layer_report) {
1148                 strcpy(label, "any memory");
1149         } else {
1150                 debugf4("%s: csrow/channel to increment: (%d,%d)\n",
1151                         __func__, row, chan);
1152                 if (p == label)
1153                         strcpy(label, "unknown memory");
1154                 if (type == HW_EVENT_ERR_CORRECTED) {
1155                         if (row >= 0) {
1156                                 mci->csrows[row]->ce_count++;
1157                                 if (chan >= 0)
1158                                         mci->csrows[row]->channels[chan]->ce_count++;
1159                         }
1160                 } else
1161                         if (row >= 0)
1162                                 mci->csrows[row]->ue_count++;
1163         }
1164
1165         /* Fill the RAM location data */
1166         p = location;
1167         for (i = 0; i < mci->n_layers; i++) {
1168                 if (pos[i] < 0)
1169                         continue;
1170
1171                 p += sprintf(p, "%s:%d ",
1172                              edac_layer_name[mci->layers[i].type],
1173                              pos[i]);
1174         }
1175         if (p > location)
1176                 *(p - 1) = '\0';
1177
1178         /* Report the error via the trace interface */
1179
1180         error_count = 1;        /* FIXME: allow change it */
1181         grain_bits = fls_long(grain) + 1;
1182         trace_mc_event(type, msg, label, error_count,
1183                        mci->mc_idx, top_layer, mid_layer, low_layer,
1184                        PAGES_TO_MiB(page_frame_number) | offset_in_page,
1185                        grain_bits, syndrome, other_detail);
1186
1187         /* Memory type dependent details about the error */
1188         if (type == HW_EVENT_ERR_CORRECTED) {
1189                 snprintf(detail, sizeof(detail),
1190                         "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
1191                         page_frame_number, offset_in_page,
1192                         grain, syndrome);
1193                 edac_ce_error(mci, pos, msg, location, label, detail,
1194                               other_detail, enable_per_layer_report,
1195                               page_frame_number, offset_in_page, grain);
1196         } else {
1197                 snprintf(detail, sizeof(detail),
1198                         "page:0x%lx offset:0x%lx grain:%ld",
1199                         page_frame_number, offset_in_page, grain);
1200
1201                 edac_ue_error(mci, pos, msg, location, label, detail,
1202                               other_detail, enable_per_layer_report);
1203         }
1204 }
1205 EXPORT_SYMBOL_GPL(edac_mc_handle_error);