Merge branch 'pm-domains'
[cascardo/linux.git] / arch / powerpc / platforms / powernv / opal-hmi.c
1 /*
2  * OPAL hypervisor Maintenance interrupt handling support in PowreNV.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; If not, see <http://www.gnu.org/licenses/>.
16  *
17  * Copyright 2014 IBM Corporation
18  * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
19  */
20
21 #undef DEBUG
22
23 #include <linux/kernel.h>
24 #include <linux/init.h>
25 #include <linux/of.h>
26 #include <linux/mm.h>
27 #include <linux/slab.h>
28
29 #include <asm/opal.h>
30 #include <asm/cputable.h>
31 #include <asm/machdep.h>
32
33 static int opal_hmi_handler_nb_init;
34 struct OpalHmiEvtNode {
35         struct list_head list;
36         struct OpalHMIEvent hmi_evt;
37 };
38
39 struct xstop_reason {
40         uint32_t xstop_reason;
41         const char *unit_failed;
42         const char *description;
43 };
44
45 static LIST_HEAD(opal_hmi_evt_list);
46 static DEFINE_SPINLOCK(opal_hmi_evt_lock);
47
48 static void print_core_checkstop_reason(const char *level,
49                                         struct OpalHMIEvent *hmi_evt)
50 {
51         int i;
52         static const struct xstop_reason xstop_reason[] = {
53                 { CORE_CHECKSTOP_IFU_REGFILE, "IFU",
54                                 "RegFile core check stop" },
55                 { CORE_CHECKSTOP_IFU_LOGIC, "IFU", "Logic core check stop" },
56                 { CORE_CHECKSTOP_PC_DURING_RECOV, "PC",
57                                 "Core checkstop during recovery" },
58                 { CORE_CHECKSTOP_ISU_REGFILE, "ISU",
59                                 "RegFile core check stop (mapper error)" },
60                 { CORE_CHECKSTOP_ISU_LOGIC, "ISU", "Logic core check stop" },
61                 { CORE_CHECKSTOP_FXU_LOGIC, "FXU", "Logic core check stop" },
62                 { CORE_CHECKSTOP_VSU_LOGIC, "VSU", "Logic core check stop" },
63                 { CORE_CHECKSTOP_PC_RECOV_IN_MAINT_MODE, "PC",
64                                 "Recovery in maintenance mode" },
65                 { CORE_CHECKSTOP_LSU_REGFILE, "LSU",
66                                 "RegFile core check stop" },
67                 { CORE_CHECKSTOP_PC_FWD_PROGRESS, "PC",
68                                 "Forward Progress Error" },
69                 { CORE_CHECKSTOP_LSU_LOGIC, "LSU", "Logic core check stop" },
70                 { CORE_CHECKSTOP_PC_LOGIC, "PC", "Logic core check stop" },
71                 { CORE_CHECKSTOP_PC_HYP_RESOURCE, "PC",
72                                 "Hypervisor Resource error - core check stop" },
73                 { CORE_CHECKSTOP_PC_HANG_RECOV_FAILED, "PC",
74                                 "Hang Recovery Failed (core check stop)" },
75                 { CORE_CHECKSTOP_PC_AMBI_HANG_DETECTED, "PC",
76                                 "Ambiguous Hang Detected (unknown source)" },
77                 { CORE_CHECKSTOP_PC_DEBUG_TRIG_ERR_INJ, "PC",
78                                 "Debug Trigger Error inject" },
79                 { CORE_CHECKSTOP_PC_SPRD_HYP_ERR_INJ, "PC",
80                                 "Hypervisor check stop via SPRC/SPRD" },
81         };
82
83         /* Validity check */
84         if (!hmi_evt->u.xstop_error.xstop_reason) {
85                 printk("%s      Unknown Core check stop.\n", level);
86                 return;
87         }
88
89         printk("%s      CPU PIR: %08x\n", level,
90                         be32_to_cpu(hmi_evt->u.xstop_error.u.pir));
91         for (i = 0; i < ARRAY_SIZE(xstop_reason); i++)
92                 if (be32_to_cpu(hmi_evt->u.xstop_error.xstop_reason) &
93                                         xstop_reason[i].xstop_reason)
94                         printk("%s      [Unit: %-3s] %s\n", level,
95                                         xstop_reason[i].unit_failed,
96                                         xstop_reason[i].description);
97 }
98
99 static void print_nx_checkstop_reason(const char *level,
100                                         struct OpalHMIEvent *hmi_evt)
101 {
102         int i;
103         static const struct xstop_reason xstop_reason[] = {
104                 { NX_CHECKSTOP_SHM_INVAL_STATE_ERR, "DMA & Engine",
105                                         "SHM invalid state error" },
106                 { NX_CHECKSTOP_DMA_INVAL_STATE_ERR_1, "DMA & Engine",
107                                         "DMA invalid state error bit 15" },
108                 { NX_CHECKSTOP_DMA_INVAL_STATE_ERR_2, "DMA & Engine",
109                                         "DMA invalid state error bit 16" },
110                 { NX_CHECKSTOP_DMA_CH0_INVAL_STATE_ERR, "DMA & Engine",
111                                         "Channel 0 invalid state error" },
112                 { NX_CHECKSTOP_DMA_CH1_INVAL_STATE_ERR, "DMA & Engine",
113                                         "Channel 1 invalid state error" },
114                 { NX_CHECKSTOP_DMA_CH2_INVAL_STATE_ERR, "DMA & Engine",
115                                         "Channel 2 invalid state error" },
116                 { NX_CHECKSTOP_DMA_CH3_INVAL_STATE_ERR, "DMA & Engine",
117                                         "Channel 3 invalid state error" },
118                 { NX_CHECKSTOP_DMA_CH4_INVAL_STATE_ERR, "DMA & Engine",
119                                         "Channel 4 invalid state error" },
120                 { NX_CHECKSTOP_DMA_CH5_INVAL_STATE_ERR, "DMA & Engine",
121                                         "Channel 5 invalid state error" },
122                 { NX_CHECKSTOP_DMA_CH6_INVAL_STATE_ERR, "DMA & Engine",
123                                         "Channel 6 invalid state error" },
124                 { NX_CHECKSTOP_DMA_CH7_INVAL_STATE_ERR, "DMA & Engine",
125                                         "Channel 7 invalid state error" },
126                 { NX_CHECKSTOP_DMA_CRB_UE, "DMA & Engine",
127                                         "UE error on CRB(CSB address, CCB)" },
128                 { NX_CHECKSTOP_DMA_CRB_SUE, "DMA & Engine",
129                                         "SUE error on CRB(CSB address, CCB)" },
130                 { NX_CHECKSTOP_PBI_ISN_UE, "PowerBus Interface",
131                 "CRB Kill ISN received while holding ISN with UE error" },
132         };
133
134         /* Validity check */
135         if (!hmi_evt->u.xstop_error.xstop_reason) {
136                 printk("%s      Unknown NX check stop.\n", level);
137                 return;
138         }
139
140         printk("%s      NX checkstop on CHIP ID: %x\n", level,
141                         be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id));
142         for (i = 0; i < ARRAY_SIZE(xstop_reason); i++)
143                 if (be32_to_cpu(hmi_evt->u.xstop_error.xstop_reason) &
144                                         xstop_reason[i].xstop_reason)
145                         printk("%s      [Unit: %-3s] %s\n", level,
146                                         xstop_reason[i].unit_failed,
147                                         xstop_reason[i].description);
148 }
149
150 static void print_checkstop_reason(const char *level,
151                                         struct OpalHMIEvent *hmi_evt)
152 {
153         uint8_t type = hmi_evt->u.xstop_error.xstop_type;
154         switch (type) {
155         case CHECKSTOP_TYPE_CORE:
156                 print_core_checkstop_reason(level, hmi_evt);
157                 break;
158         case CHECKSTOP_TYPE_NX:
159                 print_nx_checkstop_reason(level, hmi_evt);
160                 break;
161         default:
162                 printk("%s      Unknown Malfunction Alert of type %d\n",
163                        level, type);
164                 break;
165         }
166 }
167
168 static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt)
169 {
170         const char *level, *sevstr, *error_info;
171         static const char *hmi_error_types[] = {
172                 "Malfunction Alert",
173                 "Processor Recovery done",
174                 "Processor recovery occurred again",
175                 "Processor recovery occurred for masked error",
176                 "Timer facility experienced an error",
177                 "TFMR SPR is corrupted",
178                 "UPS (Uniterrupted Power System) Overflow indication",
179                 "An XSCOM operation failure",
180                 "An XSCOM operation completed",
181                 "SCOM has set a reserved FIR bit to cause recovery",
182                 "Debug trigger has set a reserved FIR bit to cause recovery",
183                 "A hypervisor resource error occurred"
184         };
185
186         /* Print things out */
187         if (hmi_evt->version < OpalHMIEvt_V1) {
188                 pr_err("HMI Interrupt, Unknown event version %d !\n",
189                         hmi_evt->version);
190                 return;
191         }
192         switch (hmi_evt->severity) {
193         case OpalHMI_SEV_NO_ERROR:
194                 level = KERN_INFO;
195                 sevstr = "Harmless";
196                 break;
197         case OpalHMI_SEV_WARNING:
198                 level = KERN_WARNING;
199                 sevstr = "";
200                 break;
201         case OpalHMI_SEV_ERROR_SYNC:
202                 level = KERN_ERR;
203                 sevstr = "Severe";
204                 break;
205         case OpalHMI_SEV_FATAL:
206         default:
207                 level = KERN_ERR;
208                 sevstr = "Fatal";
209                 break;
210         }
211
212         printk("%s%s Hypervisor Maintenance interrupt [%s]\n",
213                 level, sevstr,
214                 hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ?
215                 "Recovered" : "Not recovered");
216         error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ?
217                         hmi_error_types[hmi_evt->type]
218                         : "Unknown";
219         printk("%s Error detail: %s\n", level, error_info);
220         printk("%s      HMER: %016llx\n", level, be64_to_cpu(hmi_evt->hmer));
221         if ((hmi_evt->type == OpalHMI_ERROR_TFAC) ||
222                 (hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY))
223                 printk("%s      TFMR: %016llx\n", level,
224                                                 be64_to_cpu(hmi_evt->tfmr));
225
226         if (hmi_evt->version < OpalHMIEvt_V2)
227                 return;
228
229         /* OpalHMIEvt_V2 and above provides reason for malfunction alert. */
230         if (hmi_evt->type == OpalHMI_ERROR_MALFUNC_ALERT)
231                 print_checkstop_reason(level, hmi_evt);
232 }
233
234 static void hmi_event_handler(struct work_struct *work)
235 {
236         unsigned long flags;
237         struct OpalHMIEvent *hmi_evt;
238         struct OpalHmiEvtNode *msg_node;
239         uint8_t disposition;
240         struct opal_msg msg;
241         int unrecoverable = 0;
242
243         spin_lock_irqsave(&opal_hmi_evt_lock, flags);
244         while (!list_empty(&opal_hmi_evt_list)) {
245                 msg_node = list_entry(opal_hmi_evt_list.next,
246                                            struct OpalHmiEvtNode, list);
247                 list_del(&msg_node->list);
248                 spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
249
250                 hmi_evt = (struct OpalHMIEvent *) &msg_node->hmi_evt;
251                 print_hmi_event_info(hmi_evt);
252                 disposition = hmi_evt->disposition;
253                 kfree(msg_node);
254
255                 /*
256                  * Check if HMI event has been recovered or not. If not
257                  * then kernel can't continue, we need to panic.
258                  * But before we do that, display all the HMI event
259                  * available on the list and set unrecoverable flag to 1.
260                  */
261                 if (disposition != OpalHMI_DISPOSITION_RECOVERED)
262                         unrecoverable = 1;
263
264                 spin_lock_irqsave(&opal_hmi_evt_lock, flags);
265         }
266         spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
267
268         if (unrecoverable) {
269                 int ret;
270
271                 /* Pull all HMI events from OPAL before we panic. */
272                 while (opal_get_msg(__pa(&msg), sizeof(msg)) == OPAL_SUCCESS) {
273                         u32 type;
274
275                         type = be32_to_cpu(msg.msg_type);
276
277                         /* skip if not HMI event */
278                         if (type != OPAL_MSG_HMI_EVT)
279                                 continue;
280
281                         /* HMI event info starts from param[0] */
282                         hmi_evt = (struct OpalHMIEvent *)&msg.params[0];
283                         print_hmi_event_info(hmi_evt);
284                 }
285
286                 /*
287                  * Unrecoverable HMI exception. We need to inform BMC/OCC
288                  * about this error so that it can collect relevant data
289                  * for error analysis before rebooting.
290                  */
291                 ret = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR,
292                         "Unrecoverable HMI exception");
293                 if (ret == OPAL_UNSUPPORTED) {
294                         pr_emerg("Reboot type %d not supported\n",
295                                                 OPAL_REBOOT_PLATFORM_ERROR);
296                 }
297
298                 /*
299                  * Fall through and panic if opal_cec_reboot2() returns
300                  * OPAL_UNSUPPORTED.
301                  */
302                 panic("Unrecoverable HMI exception");
303         }
304 }
305
306 static DECLARE_WORK(hmi_event_work, hmi_event_handler);
307 /*
308  * opal_handle_hmi_event - notifier handler that queues up HMI events
309  * to be preocessed later.
310  */
311 static int opal_handle_hmi_event(struct notifier_block *nb,
312                           unsigned long msg_type, void *msg)
313 {
314         unsigned long flags;
315         struct OpalHMIEvent *hmi_evt;
316         struct opal_msg *hmi_msg = msg;
317         struct OpalHmiEvtNode *msg_node;
318
319         /* Sanity Checks */
320         if (msg_type != OPAL_MSG_HMI_EVT)
321                 return 0;
322
323         /* HMI event info starts from param[0] */
324         hmi_evt = (struct OpalHMIEvent *)&hmi_msg->params[0];
325
326         /* Delay the logging of HMI events to workqueue. */
327         msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC);
328         if (!msg_node) {
329                 pr_err("HMI: out of memory, Opal message event not handled\n");
330                 return -ENOMEM;
331         }
332         memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(struct OpalHMIEvent));
333
334         spin_lock_irqsave(&opal_hmi_evt_lock, flags);
335         list_add(&msg_node->list, &opal_hmi_evt_list);
336         spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
337
338         schedule_work(&hmi_event_work);
339         return 0;
340 }
341
342 static struct notifier_block opal_hmi_handler_nb = {
343         .notifier_call  = opal_handle_hmi_event,
344         .next           = NULL,
345         .priority       = 0,
346 };
347
348 int __init opal_hmi_handler_init(void)
349 {
350         int ret;
351
352         if (!opal_hmi_handler_nb_init) {
353                 ret = opal_message_notifier_register(
354                                 OPAL_MSG_HMI_EVT, &opal_hmi_handler_nb);
355                 if (ret) {
356                         pr_err("%s: Can't register OPAL event notifier (%d)\n",
357                                __func__, ret);
358                         return ret;
359                 }
360                 opal_hmi_handler_nb_init = 1;
361         }
362         return 0;
363 }