Merge tag 'gcc-plugins-v4.9-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git...
[cascardo/linux.git] / drivers / block / drbd / drbd_nl.c
1 /*
2    drbd_nl.c
3
4    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10    drbd is free software; you can redistribute it and/or modify
11    it under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 2, or (at your option)
13    any later version.
14
15    drbd is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License for more details.
19
20    You should have received a copy of the GNU General Public License
21    along with drbd; see the file COPYING.  If not, write to
22    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24  */
25
26 #define pr_fmt(fmt)     KBUILD_MODNAME ": " fmt
27
28 #include <linux/module.h>
29 #include <linux/drbd.h>
30 #include <linux/in.h>
31 #include <linux/fs.h>
32 #include <linux/file.h>
33 #include <linux/slab.h>
34 #include <linux/blkpg.h>
35 #include <linux/cpumask.h>
36 #include "drbd_int.h"
37 #include "drbd_protocol.h"
38 #include "drbd_req.h"
39 #include "drbd_state_change.h"
40 #include <asm/unaligned.h>
41 #include <linux/drbd_limits.h>
42 #include <linux/kthread.h>
43
44 #include <net/genetlink.h>
45
46 /* .doit */
47 // int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
48 // int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
49
50 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info);
51 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info);
52
53 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info);
54 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info);
55 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
56
57 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
58 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
59 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
60 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
61 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
62 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
63 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
64 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
65 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
66 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
67 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
68 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
69 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
70 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
71 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
72 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
73 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
74 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
75 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
76 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
77 /* .dumpit */
78 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
79 int drbd_adm_dump_resources(struct sk_buff *skb, struct netlink_callback *cb);
80 int drbd_adm_dump_devices(struct sk_buff *skb, struct netlink_callback *cb);
81 int drbd_adm_dump_devices_done(struct netlink_callback *cb);
82 int drbd_adm_dump_connections(struct sk_buff *skb, struct netlink_callback *cb);
83 int drbd_adm_dump_connections_done(struct netlink_callback *cb);
84 int drbd_adm_dump_peer_devices(struct sk_buff *skb, struct netlink_callback *cb);
85 int drbd_adm_dump_peer_devices_done(struct netlink_callback *cb);
86 int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb);
87
88 #include <linux/drbd_genl_api.h>
89 #include "drbd_nla.h"
90 #include <linux/genl_magic_func.h>
91
92 static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
93 static atomic_t notify_genl_seq = ATOMIC_INIT(2); /* two. */
94
95 DEFINE_MUTEX(notification_mutex);
96
97 /* used blkdev_get_by_path, to claim our meta data device(s) */
98 static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
99
100 static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
101 {
102         genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
103         if (genlmsg_reply(skb, info))
104                 pr_err("error sending genl reply\n");
105 }
106
107 /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
108  * reason it could fail was no space in skb, and there are 4k available. */
109 static int drbd_msg_put_info(struct sk_buff *skb, const char *info)
110 {
111         struct nlattr *nla;
112         int err = -EMSGSIZE;
113
114         if (!info || !info[0])
115                 return 0;
116
117         nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
118         if (!nla)
119                 return err;
120
121         err = nla_put_string(skb, T_info_text, info);
122         if (err) {
123                 nla_nest_cancel(skb, nla);
124                 return err;
125         } else
126                 nla_nest_end(skb, nla);
127         return 0;
128 }
129
130 /* This would be a good candidate for a "pre_doit" hook,
131  * and per-family private info->pointers.
132  * But we need to stay compatible with older kernels.
133  * If it returns successfully, adm_ctx members are valid.
134  *
135  * At this point, we still rely on the global genl_lock().
136  * If we want to avoid that, and allow "genl_family.parallel_ops", we may need
137  * to add additional synchronization against object destruction/modification.
138  */
139 #define DRBD_ADM_NEED_MINOR     1
140 #define DRBD_ADM_NEED_RESOURCE  2
141 #define DRBD_ADM_NEED_CONNECTION 4
142 static int drbd_adm_prepare(struct drbd_config_context *adm_ctx,
143         struct sk_buff *skb, struct genl_info *info, unsigned flags)
144 {
145         struct drbd_genlmsghdr *d_in = info->userhdr;
146         const u8 cmd = info->genlhdr->cmd;
147         int err;
148
149         memset(adm_ctx, 0, sizeof(*adm_ctx));
150
151         /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
152         if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN))
153                return -EPERM;
154
155         adm_ctx->reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
156         if (!adm_ctx->reply_skb) {
157                 err = -ENOMEM;
158                 goto fail;
159         }
160
161         adm_ctx->reply_dh = genlmsg_put_reply(adm_ctx->reply_skb,
162                                         info, &drbd_genl_family, 0, cmd);
163         /* put of a few bytes into a fresh skb of >= 4k will always succeed.
164          * but anyways */
165         if (!adm_ctx->reply_dh) {
166                 err = -ENOMEM;
167                 goto fail;
168         }
169
170         adm_ctx->reply_dh->minor = d_in->minor;
171         adm_ctx->reply_dh->ret_code = NO_ERROR;
172
173         adm_ctx->volume = VOLUME_UNSPECIFIED;
174         if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
175                 struct nlattr *nla;
176                 /* parse and validate only */
177                 err = drbd_cfg_context_from_attrs(NULL, info);
178                 if (err)
179                         goto fail;
180
181                 /* It was present, and valid,
182                  * copy it over to the reply skb. */
183                 err = nla_put_nohdr(adm_ctx->reply_skb,
184                                 info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
185                                 info->attrs[DRBD_NLA_CFG_CONTEXT]);
186                 if (err)
187                         goto fail;
188
189                 /* and assign stuff to the adm_ctx */
190                 nla = nested_attr_tb[__nla_type(T_ctx_volume)];
191                 if (nla)
192                         adm_ctx->volume = nla_get_u32(nla);
193                 nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
194                 if (nla)
195                         adm_ctx->resource_name = nla_data(nla);
196                 adm_ctx->my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
197                 adm_ctx->peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
198                 if ((adm_ctx->my_addr &&
199                      nla_len(adm_ctx->my_addr) > sizeof(adm_ctx->connection->my_addr)) ||
200                     (adm_ctx->peer_addr &&
201                      nla_len(adm_ctx->peer_addr) > sizeof(adm_ctx->connection->peer_addr))) {
202                         err = -EINVAL;
203                         goto fail;
204                 }
205         }
206
207         adm_ctx->minor = d_in->minor;
208         adm_ctx->device = minor_to_device(d_in->minor);
209
210         /* We are protected by the global genl_lock().
211          * But we may explicitly drop it/retake it in drbd_adm_set_role(),
212          * so make sure this object stays around. */
213         if (adm_ctx->device)
214                 kref_get(&adm_ctx->device->kref);
215
216         if (adm_ctx->resource_name) {
217                 adm_ctx->resource = drbd_find_resource(adm_ctx->resource_name);
218         }
219
220         if (!adm_ctx->device && (flags & DRBD_ADM_NEED_MINOR)) {
221                 drbd_msg_put_info(adm_ctx->reply_skb, "unknown minor");
222                 return ERR_MINOR_INVALID;
223         }
224         if (!adm_ctx->resource && (flags & DRBD_ADM_NEED_RESOURCE)) {
225                 drbd_msg_put_info(adm_ctx->reply_skb, "unknown resource");
226                 if (adm_ctx->resource_name)
227                         return ERR_RES_NOT_KNOWN;
228                 return ERR_INVALID_REQUEST;
229         }
230
231         if (flags & DRBD_ADM_NEED_CONNECTION) {
232                 if (adm_ctx->resource) {
233                         drbd_msg_put_info(adm_ctx->reply_skb, "no resource name expected");
234                         return ERR_INVALID_REQUEST;
235                 }
236                 if (adm_ctx->device) {
237                         drbd_msg_put_info(adm_ctx->reply_skb, "no minor number expected");
238                         return ERR_INVALID_REQUEST;
239                 }
240                 if (adm_ctx->my_addr && adm_ctx->peer_addr)
241                         adm_ctx->connection = conn_get_by_addrs(nla_data(adm_ctx->my_addr),
242                                                           nla_len(adm_ctx->my_addr),
243                                                           nla_data(adm_ctx->peer_addr),
244                                                           nla_len(adm_ctx->peer_addr));
245                 if (!adm_ctx->connection) {
246                         drbd_msg_put_info(adm_ctx->reply_skb, "unknown connection");
247                         return ERR_INVALID_REQUEST;
248                 }
249         }
250
251         /* some more paranoia, if the request was over-determined */
252         if (adm_ctx->device && adm_ctx->resource &&
253             adm_ctx->device->resource != adm_ctx->resource) {
254                 pr_warning("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
255                                 adm_ctx->minor, adm_ctx->resource->name,
256                                 adm_ctx->device->resource->name);
257                 drbd_msg_put_info(adm_ctx->reply_skb, "minor exists in different resource");
258                 return ERR_INVALID_REQUEST;
259         }
260         if (adm_ctx->device &&
261             adm_ctx->volume != VOLUME_UNSPECIFIED &&
262             adm_ctx->volume != adm_ctx->device->vnr) {
263                 pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
264                                 adm_ctx->minor, adm_ctx->volume,
265                                 adm_ctx->device->vnr,
266                                 adm_ctx->device->resource->name);
267                 drbd_msg_put_info(adm_ctx->reply_skb, "minor exists as different volume");
268                 return ERR_INVALID_REQUEST;
269         }
270
271         /* still, provide adm_ctx->resource always, if possible. */
272         if (!adm_ctx->resource) {
273                 adm_ctx->resource = adm_ctx->device ? adm_ctx->device->resource
274                         : adm_ctx->connection ? adm_ctx->connection->resource : NULL;
275                 if (adm_ctx->resource)
276                         kref_get(&adm_ctx->resource->kref);
277         }
278
279         return NO_ERROR;
280
281 fail:
282         nlmsg_free(adm_ctx->reply_skb);
283         adm_ctx->reply_skb = NULL;
284         return err;
285 }
286
287 static int drbd_adm_finish(struct drbd_config_context *adm_ctx,
288         struct genl_info *info, int retcode)
289 {
290         if (adm_ctx->device) {
291                 kref_put(&adm_ctx->device->kref, drbd_destroy_device);
292                 adm_ctx->device = NULL;
293         }
294         if (adm_ctx->connection) {
295                 kref_put(&adm_ctx->connection->kref, &drbd_destroy_connection);
296                 adm_ctx->connection = NULL;
297         }
298         if (adm_ctx->resource) {
299                 kref_put(&adm_ctx->resource->kref, drbd_destroy_resource);
300                 adm_ctx->resource = NULL;
301         }
302
303         if (!adm_ctx->reply_skb)
304                 return -ENOMEM;
305
306         adm_ctx->reply_dh->ret_code = retcode;
307         drbd_adm_send_reply(adm_ctx->reply_skb, info);
308         return 0;
309 }
310
311 static void setup_khelper_env(struct drbd_connection *connection, char **envp)
312 {
313         char *afs;
314
315         /* FIXME: A future version will not allow this case. */
316         if (connection->my_addr_len == 0 || connection->peer_addr_len == 0)
317                 return;
318
319         switch (((struct sockaddr *)&connection->peer_addr)->sa_family) {
320         case AF_INET6:
321                 afs = "ipv6";
322                 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
323                          &((struct sockaddr_in6 *)&connection->peer_addr)->sin6_addr);
324                 break;
325         case AF_INET:
326                 afs = "ipv4";
327                 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
328                          &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
329                 break;
330         default:
331                 afs = "ssocks";
332                 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
333                          &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
334         }
335         snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
336 }
337
338 int drbd_khelper(struct drbd_device *device, char *cmd)
339 {
340         char *envp[] = { "HOME=/",
341                         "TERM=linux",
342                         "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
343                          (char[20]) { }, /* address family */
344                          (char[60]) { }, /* address */
345                         NULL };
346         char mb[14];
347         char *argv[] = {usermode_helper, cmd, mb, NULL };
348         struct drbd_connection *connection = first_peer_device(device)->connection;
349         struct sib_info sib;
350         int ret;
351
352         if (current == connection->worker.task)
353                 set_bit(CALLBACK_PENDING, &connection->flags);
354
355         snprintf(mb, 14, "minor-%d", device_to_minor(device));
356         setup_khelper_env(connection, envp);
357
358         /* The helper may take some time.
359          * write out any unsynced meta data changes now */
360         drbd_md_sync(device);
361
362         drbd_info(device, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
363         sib.sib_reason = SIB_HELPER_PRE;
364         sib.helper_name = cmd;
365         drbd_bcast_event(device, &sib);
366         notify_helper(NOTIFY_CALL, device, connection, cmd, 0);
367         ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
368         if (ret)
369                 drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n",
370                                 usermode_helper, cmd, mb,
371                                 (ret >> 8) & 0xff, ret);
372         else
373                 drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n",
374                                 usermode_helper, cmd, mb,
375                                 (ret >> 8) & 0xff, ret);
376         sib.sib_reason = SIB_HELPER_POST;
377         sib.helper_exit_code = ret;
378         drbd_bcast_event(device, &sib);
379         notify_helper(NOTIFY_RESPONSE, device, connection, cmd, ret);
380
381         if (current == connection->worker.task)
382                 clear_bit(CALLBACK_PENDING, &connection->flags);
383
384         if (ret < 0) /* Ignore any ERRNOs we got. */
385                 ret = 0;
386
387         return ret;
388 }
389
390 enum drbd_peer_state conn_khelper(struct drbd_connection *connection, char *cmd)
391 {
392         char *envp[] = { "HOME=/",
393                         "TERM=linux",
394                         "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
395                          (char[20]) { }, /* address family */
396                          (char[60]) { }, /* address */
397                         NULL };
398         char *resource_name = connection->resource->name;
399         char *argv[] = {usermode_helper, cmd, resource_name, NULL };
400         int ret;
401
402         setup_khelper_env(connection, envp);
403         conn_md_sync(connection);
404
405         drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name);
406         /* TODO: conn_bcast_event() ?? */
407         notify_helper(NOTIFY_CALL, NULL, connection, cmd, 0);
408
409         ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
410         if (ret)
411                 drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
412                           usermode_helper, cmd, resource_name,
413                           (ret >> 8) & 0xff, ret);
414         else
415                 drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
416                           usermode_helper, cmd, resource_name,
417                           (ret >> 8) & 0xff, ret);
418         /* TODO: conn_bcast_event() ?? */
419         notify_helper(NOTIFY_RESPONSE, NULL, connection, cmd, ret);
420
421         if (ret < 0) /* Ignore any ERRNOs we got. */
422                 ret = 0;
423
424         return ret;
425 }
426
427 static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connection)
428 {
429         enum drbd_fencing_p fp = FP_NOT_AVAIL;
430         struct drbd_peer_device *peer_device;
431         int vnr;
432
433         rcu_read_lock();
434         idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
435                 struct drbd_device *device = peer_device->device;
436                 if (get_ldev_if_state(device, D_CONSISTENT)) {
437                         struct disk_conf *disk_conf =
438                                 rcu_dereference(peer_device->device->ldev->disk_conf);
439                         fp = max_t(enum drbd_fencing_p, fp, disk_conf->fencing);
440                         put_ldev(device);
441                 }
442         }
443         rcu_read_unlock();
444
445         return fp;
446 }
447
448 static bool resource_is_supended(struct drbd_resource *resource)
449 {
450         return resource->susp || resource->susp_fen || resource->susp_nod;
451 }
452
453 bool conn_try_outdate_peer(struct drbd_connection *connection)
454 {
455         struct drbd_resource * const resource = connection->resource;
456         unsigned int connect_cnt;
457         union drbd_state mask = { };
458         union drbd_state val = { };
459         enum drbd_fencing_p fp;
460         char *ex_to_string;
461         int r;
462
463         spin_lock_irq(&resource->req_lock);
464         if (connection->cstate >= C_WF_REPORT_PARAMS) {
465                 drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n");
466                 spin_unlock_irq(&resource->req_lock);
467                 return false;
468         }
469
470         connect_cnt = connection->connect_cnt;
471         spin_unlock_irq(&resource->req_lock);
472
473         fp = highest_fencing_policy(connection);
474         switch (fp) {
475         case FP_NOT_AVAIL:
476                 drbd_warn(connection, "Not fencing peer, I'm not even Consistent myself.\n");
477                 spin_lock_irq(&resource->req_lock);
478                 if (connection->cstate < C_WF_REPORT_PARAMS) {
479                         _conn_request_state(connection,
480                                             (union drbd_state) { { .susp_fen = 1 } },
481                                             (union drbd_state) { { .susp_fen = 0 } },
482                                             CS_VERBOSE | CS_HARD | CS_DC_SUSP);
483                         /* We are no longer suspended due to the fencing policy.
484                          * We may still be suspended due to the on-no-data-accessible policy.
485                          * If that was OND_IO_ERROR, fail pending requests. */
486                         if (!resource_is_supended(resource))
487                                 _tl_restart(connection, CONNECTION_LOST_WHILE_PENDING);
488                 }
489                 /* Else: in case we raced with a connection handshake,
490                  * let the handshake figure out if we maybe can RESEND,
491                  * and do not resume/fail pending requests here.
492                  * Worst case is we stay suspended for now, which may be
493                  * resolved by either re-establishing the replication link, or
494                  * the next link failure, or eventually the administrator.  */
495                 spin_unlock_irq(&resource->req_lock);
496                 return false;
497
498         case FP_DONT_CARE:
499                 return true;
500         default: ;
501         }
502
503         r = conn_khelper(connection, "fence-peer");
504
505         switch ((r>>8) & 0xff) {
506         case P_INCONSISTENT: /* peer is inconsistent */
507                 ex_to_string = "peer is inconsistent or worse";
508                 mask.pdsk = D_MASK;
509                 val.pdsk = D_INCONSISTENT;
510                 break;
511         case P_OUTDATED: /* peer got outdated, or was already outdated */
512                 ex_to_string = "peer was fenced";
513                 mask.pdsk = D_MASK;
514                 val.pdsk = D_OUTDATED;
515                 break;
516         case P_DOWN: /* peer was down */
517                 if (conn_highest_disk(connection) == D_UP_TO_DATE) {
518                         /* we will(have) create(d) a new UUID anyways... */
519                         ex_to_string = "peer is unreachable, assumed to be dead";
520                         mask.pdsk = D_MASK;
521                         val.pdsk = D_OUTDATED;
522                 } else {
523                         ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
524                 }
525                 break;
526         case P_PRIMARY: /* Peer is primary, voluntarily outdate myself.
527                  * This is useful when an unconnected R_SECONDARY is asked to
528                  * become R_PRIMARY, but finds the other peer being active. */
529                 ex_to_string = "peer is active";
530                 drbd_warn(connection, "Peer is primary, outdating myself.\n");
531                 mask.disk = D_MASK;
532                 val.disk = D_OUTDATED;
533                 break;
534         case P_FENCING:
535                 /* THINK: do we need to handle this
536                  * like case 4, or more like case 5? */
537                 if (fp != FP_STONITH)
538                         drbd_err(connection, "fence-peer() = 7 && fencing != Stonith !!!\n");
539                 ex_to_string = "peer was stonithed";
540                 mask.pdsk = D_MASK;
541                 val.pdsk = D_OUTDATED;
542                 break;
543         default:
544                 /* The script is broken ... */
545                 drbd_err(connection, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
546                 return false; /* Eventually leave IO frozen */
547         }
548
549         drbd_info(connection, "fence-peer helper returned %d (%s)\n",
550                   (r>>8) & 0xff, ex_to_string);
551
552         /* Not using
553            conn_request_state(connection, mask, val, CS_VERBOSE);
554            here, because we might were able to re-establish the connection in the
555            meantime. */
556         spin_lock_irq(&resource->req_lock);
557         if (connection->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &connection->flags)) {
558                 if (connection->connect_cnt != connect_cnt)
559                         /* In case the connection was established and droped
560                            while the fence-peer handler was running, ignore it */
561                         drbd_info(connection, "Ignoring fence-peer exit code\n");
562                 else
563                         _conn_request_state(connection, mask, val, CS_VERBOSE);
564         }
565         spin_unlock_irq(&resource->req_lock);
566
567         return conn_highest_pdsk(connection) <= D_OUTDATED;
568 }
569
570 static int _try_outdate_peer_async(void *data)
571 {
572         struct drbd_connection *connection = (struct drbd_connection *)data;
573
574         conn_try_outdate_peer(connection);
575
576         kref_put(&connection->kref, drbd_destroy_connection);
577         return 0;
578 }
579
580 void conn_try_outdate_peer_async(struct drbd_connection *connection)
581 {
582         struct task_struct *opa;
583
584         kref_get(&connection->kref);
585         /* We may just have force_sig()'ed this thread
586          * to get it out of some blocking network function.
587          * Clear signals; otherwise kthread_run(), which internally uses
588          * wait_on_completion_killable(), will mistake our pending signal
589          * for a new fatal signal and fail. */
590         flush_signals(current);
591         opa = kthread_run(_try_outdate_peer_async, connection, "drbd_async_h");
592         if (IS_ERR(opa)) {
593                 drbd_err(connection, "out of mem, failed to invoke fence-peer helper\n");
594                 kref_put(&connection->kref, drbd_destroy_connection);
595         }
596 }
597
598 enum drbd_state_rv
599 drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int force)
600 {
601         struct drbd_peer_device *const peer_device = first_peer_device(device);
602         struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
603         const int max_tries = 4;
604         enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
605         struct net_conf *nc;
606         int try = 0;
607         int forced = 0;
608         union drbd_state mask, val;
609
610         if (new_role == R_PRIMARY) {
611                 struct drbd_connection *connection;
612
613                 /* Detect dead peers as soon as possible.  */
614
615                 rcu_read_lock();
616                 for_each_connection(connection, device->resource)
617                         request_ping(connection);
618                 rcu_read_unlock();
619         }
620
621         mutex_lock(device->state_mutex);
622
623         mask.i = 0; mask.role = R_MASK;
624         val.i  = 0; val.role  = new_role;
625
626         while (try++ < max_tries) {
627                 rv = _drbd_request_state_holding_state_mutex(device, mask, val, CS_WAIT_COMPLETE);
628
629                 /* in case we first succeeded to outdate,
630                  * but now suddenly could establish a connection */
631                 if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
632                         val.pdsk = 0;
633                         mask.pdsk = 0;
634                         continue;
635                 }
636
637                 if (rv == SS_NO_UP_TO_DATE_DISK && force &&
638                     (device->state.disk < D_UP_TO_DATE &&
639                      device->state.disk >= D_INCONSISTENT)) {
640                         mask.disk = D_MASK;
641                         val.disk  = D_UP_TO_DATE;
642                         forced = 1;
643                         continue;
644                 }
645
646                 if (rv == SS_NO_UP_TO_DATE_DISK &&
647                     device->state.disk == D_CONSISTENT && mask.pdsk == 0) {
648                         D_ASSERT(device, device->state.pdsk == D_UNKNOWN);
649
650                         if (conn_try_outdate_peer(connection)) {
651                                 val.disk = D_UP_TO_DATE;
652                                 mask.disk = D_MASK;
653                         }
654                         continue;
655                 }
656
657                 if (rv == SS_NOTHING_TO_DO)
658                         goto out;
659                 if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
660                         if (!conn_try_outdate_peer(connection) && force) {
661                                 drbd_warn(device, "Forced into split brain situation!\n");
662                                 mask.pdsk = D_MASK;
663                                 val.pdsk  = D_OUTDATED;
664
665                         }
666                         continue;
667                 }
668                 if (rv == SS_TWO_PRIMARIES) {
669                         /* Maybe the peer is detected as dead very soon...
670                            retry at most once more in this case. */
671                         int timeo;
672                         rcu_read_lock();
673                         nc = rcu_dereference(connection->net_conf);
674                         timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
675                         rcu_read_unlock();
676                         schedule_timeout_interruptible(timeo);
677                         if (try < max_tries)
678                                 try = max_tries - 1;
679                         continue;
680                 }
681                 if (rv < SS_SUCCESS) {
682                         rv = _drbd_request_state(device, mask, val,
683                                                 CS_VERBOSE + CS_WAIT_COMPLETE);
684                         if (rv < SS_SUCCESS)
685                                 goto out;
686                 }
687                 break;
688         }
689
690         if (rv < SS_SUCCESS)
691                 goto out;
692
693         if (forced)
694                 drbd_warn(device, "Forced to consider local data as UpToDate!\n");
695
696         /* Wait until nothing is on the fly :) */
697         wait_event(device->misc_wait, atomic_read(&device->ap_pending_cnt) == 0);
698
699         /* FIXME also wait for all pending P_BARRIER_ACK? */
700
701         if (new_role == R_SECONDARY) {
702                 if (get_ldev(device)) {
703                         device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
704                         put_ldev(device);
705                 }
706         } else {
707                 mutex_lock(&device->resource->conf_update);
708                 nc = connection->net_conf;
709                 if (nc)
710                         nc->discard_my_data = 0; /* without copy; single bit op is atomic */
711                 mutex_unlock(&device->resource->conf_update);
712
713                 if (get_ldev(device)) {
714                         if (((device->state.conn < C_CONNECTED ||
715                                device->state.pdsk <= D_FAILED)
716                               && device->ldev->md.uuid[UI_BITMAP] == 0) || forced)
717                                 drbd_uuid_new_current(device);
718
719                         device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
720                         put_ldev(device);
721                 }
722         }
723
724         /* writeout of activity log covered areas of the bitmap
725          * to stable storage done in after state change already */
726
727         if (device->state.conn >= C_WF_REPORT_PARAMS) {
728                 /* if this was forced, we should consider sync */
729                 if (forced)
730                         drbd_send_uuids(peer_device);
731                 drbd_send_current_state(peer_device);
732         }
733
734         drbd_md_sync(device);
735         set_disk_ro(device->vdisk, new_role == R_SECONDARY);
736         kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
737 out:
738         mutex_unlock(device->state_mutex);
739         return rv;
740 }
741
742 static const char *from_attrs_err_to_txt(int err)
743 {
744         return  err == -ENOMSG ? "required attribute missing" :
745                 err == -EOPNOTSUPP ? "unknown mandatory attribute" :
746                 err == -EEXIST ? "can not change invariant setting" :
747                 "invalid attribute value";
748 }
749
750 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
751 {
752         struct drbd_config_context adm_ctx;
753         struct set_role_parms parms;
754         int err;
755         enum drbd_ret_code retcode;
756
757         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
758         if (!adm_ctx.reply_skb)
759                 return retcode;
760         if (retcode != NO_ERROR)
761                 goto out;
762
763         memset(&parms, 0, sizeof(parms));
764         if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
765                 err = set_role_parms_from_attrs(&parms, info);
766                 if (err) {
767                         retcode = ERR_MANDATORY_TAG;
768                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
769                         goto out;
770                 }
771         }
772         genl_unlock();
773         mutex_lock(&adm_ctx.resource->adm_mutex);
774
775         if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
776                 retcode = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
777         else
778                 retcode = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
779
780         mutex_unlock(&adm_ctx.resource->adm_mutex);
781         genl_lock();
782 out:
783         drbd_adm_finish(&adm_ctx, info, retcode);
784         return 0;
785 }
786
787 /* Initializes the md.*_offset members, so we are able to find
788  * the on disk meta data.
789  *
790  * We currently have two possible layouts:
791  * external:
792  *   |----------- md_size_sect ------------------|
793  *   [ 4k superblock ][ activity log ][  Bitmap  ]
794  *   | al_offset == 8 |
795  *   | bm_offset = al_offset + X      |
796  *  ==> bitmap sectors = md_size_sect - bm_offset
797  *
798  * internal:
799  *            |----------- md_size_sect ------------------|
800  * [data.....][  Bitmap  ][ activity log ][ 4k superblock ]
801  *                        | al_offset < 0 |
802  *            | bm_offset = al_offset - Y |
803  *  ==> bitmap sectors = Y = al_offset - bm_offset
804  *
805  *  Activity log size used to be fixed 32kB,
806  *  but is about to become configurable.
807  */
808 static void drbd_md_set_sector_offsets(struct drbd_device *device,
809                                        struct drbd_backing_dev *bdev)
810 {
811         sector_t md_size_sect = 0;
812         unsigned int al_size_sect = bdev->md.al_size_4k * 8;
813
814         bdev->md.md_offset = drbd_md_ss(bdev);
815
816         switch (bdev->md.meta_dev_idx) {
817         default:
818                 /* v07 style fixed size indexed meta data */
819                 bdev->md.md_size_sect = MD_128MB_SECT;
820                 bdev->md.al_offset = MD_4kB_SECT;
821                 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
822                 break;
823         case DRBD_MD_INDEX_FLEX_EXT:
824                 /* just occupy the full device; unit: sectors */
825                 bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
826                 bdev->md.al_offset = MD_4kB_SECT;
827                 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
828                 break;
829         case DRBD_MD_INDEX_INTERNAL:
830         case DRBD_MD_INDEX_FLEX_INT:
831                 /* al size is still fixed */
832                 bdev->md.al_offset = -al_size_sect;
833                 /* we need (slightly less than) ~ this much bitmap sectors: */
834                 md_size_sect = drbd_get_capacity(bdev->backing_bdev);
835                 md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
836                 md_size_sect = BM_SECT_TO_EXT(md_size_sect);
837                 md_size_sect = ALIGN(md_size_sect, 8);
838
839                 /* plus the "drbd meta data super block",
840                  * and the activity log; */
841                 md_size_sect += MD_4kB_SECT + al_size_sect;
842
843                 bdev->md.md_size_sect = md_size_sect;
844                 /* bitmap offset is adjusted by 'super' block size */
845                 bdev->md.bm_offset   = -md_size_sect + MD_4kB_SECT;
846                 break;
847         }
848 }
849
850 /* input size is expected to be in KB */
851 char *ppsize(char *buf, unsigned long long size)
852 {
853         /* Needs 9 bytes at max including trailing NUL:
854          * -1ULL ==> "16384 EB" */
855         static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
856         int base = 0;
857         while (size >= 10000 && base < sizeof(units)-1) {
858                 /* shift + round */
859                 size = (size >> 10) + !!(size & (1<<9));
860                 base++;
861         }
862         sprintf(buf, "%u %cB", (unsigned)size, units[base]);
863
864         return buf;
865 }
866
867 /* there is still a theoretical deadlock when called from receiver
868  * on an D_INCONSISTENT R_PRIMARY:
869  *  remote READ does inc_ap_bio, receiver would need to receive answer
870  *  packet from remote to dec_ap_bio again.
871  *  receiver receive_sizes(), comes here,
872  *  waits for ap_bio_cnt == 0. -> deadlock.
873  * but this cannot happen, actually, because:
874  *  R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
875  *  (not connected, or bad/no disk on peer):
876  *  see drbd_fail_request_early, ap_bio_cnt is zero.
877  *  R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
878  *  peer may not initiate a resize.
879  */
880 /* Note these are not to be confused with
881  * drbd_adm_suspend_io/drbd_adm_resume_io,
882  * which are (sub) state changes triggered by admin (drbdsetup),
883  * and can be long lived.
884  * This changes an device->flag, is triggered by drbd internals,
885  * and should be short-lived. */
886 /* It needs to be a counter, since multiple threads might
887    independently suspend and resume IO. */
888 void drbd_suspend_io(struct drbd_device *device)
889 {
890         atomic_inc(&device->suspend_cnt);
891         if (drbd_suspended(device))
892                 return;
893         wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt));
894 }
895
896 void drbd_resume_io(struct drbd_device *device)
897 {
898         if (atomic_dec_and_test(&device->suspend_cnt))
899                 wake_up(&device->misc_wait);
900 }
901
902 /**
903  * drbd_determine_dev_size() -  Sets the right device size obeying all constraints
904  * @device:     DRBD device.
905  *
906  * Returns 0 on success, negative return values indicate errors.
907  * You should call drbd_md_sync() after calling this function.
908  */
909 enum determine_dev_size
910 drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
911 {
912         struct md_offsets_and_sizes {
913                 u64 last_agreed_sect;
914                 u64 md_offset;
915                 s32 al_offset;
916                 s32 bm_offset;
917                 u32 md_size_sect;
918
919                 u32 al_stripes;
920                 u32 al_stripe_size_4k;
921         } prev;
922         sector_t u_size, size;
923         struct drbd_md *md = &device->ldev->md;
924         char ppb[10];
925         void *buffer;
926
927         int md_moved, la_size_changed;
928         enum determine_dev_size rv = DS_UNCHANGED;
929
930         /* We may change the on-disk offsets of our meta data below.  Lock out
931          * anything that may cause meta data IO, to avoid acting on incomplete
932          * layout changes or scribbling over meta data that is in the process
933          * of being moved.
934          *
935          * Move is not exactly correct, btw, currently we have all our meta
936          * data in core memory, to "move" it we just write it all out, there
937          * are no reads. */
938         drbd_suspend_io(device);
939         buffer = drbd_md_get_buffer(device, __func__); /* Lock meta-data IO */
940         if (!buffer) {
941                 drbd_resume_io(device);
942                 return DS_ERROR;
943         }
944
945         /* remember current offset and sizes */
946         prev.last_agreed_sect = md->la_size_sect;
947         prev.md_offset = md->md_offset;
948         prev.al_offset = md->al_offset;
949         prev.bm_offset = md->bm_offset;
950         prev.md_size_sect = md->md_size_sect;
951         prev.al_stripes = md->al_stripes;
952         prev.al_stripe_size_4k = md->al_stripe_size_4k;
953
954         if (rs) {
955                 /* rs is non NULL if we should change the AL layout only */
956                 md->al_stripes = rs->al_stripes;
957                 md->al_stripe_size_4k = rs->al_stripe_size / 4;
958                 md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
959         }
960
961         drbd_md_set_sector_offsets(device, device->ldev);
962
963         rcu_read_lock();
964         u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
965         rcu_read_unlock();
966         size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED);
967
968         if (size < prev.last_agreed_sect) {
969                 if (rs && u_size == 0) {
970                         /* Remove "rs &&" later. This check should always be active, but
971                            right now the receiver expects the permissive behavior */
972                         drbd_warn(device, "Implicit shrink not allowed. "
973                                  "Use --size=%llus for explicit shrink.\n",
974                                  (unsigned long long)size);
975                         rv = DS_ERROR_SHRINK;
976                 }
977                 if (u_size > size)
978                         rv = DS_ERROR_SPACE_MD;
979                 if (rv != DS_UNCHANGED)
980                         goto err_out;
981         }
982
983         if (drbd_get_capacity(device->this_bdev) != size ||
984             drbd_bm_capacity(device) != size) {
985                 int err;
986                 err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC));
987                 if (unlikely(err)) {
988                         /* currently there is only one error: ENOMEM! */
989                         size = drbd_bm_capacity(device);
990                         if (size == 0) {
991                                 drbd_err(device, "OUT OF MEMORY! "
992                                     "Could not allocate bitmap!\n");
993                         } else {
994                                 drbd_err(device, "BM resizing failed. "
995                                     "Leaving size unchanged\n");
996                         }
997                         rv = DS_ERROR;
998                 }
999                 /* racy, see comments above. */
1000                 drbd_set_my_capacity(device, size);
1001                 md->la_size_sect = size;
1002                 drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
1003                      (unsigned long long)size>>1);
1004         }
1005         if (rv <= DS_ERROR)
1006                 goto err_out;
1007
1008         la_size_changed = (prev.last_agreed_sect != md->la_size_sect);
1009
1010         md_moved = prev.md_offset    != md->md_offset
1011                 || prev.md_size_sect != md->md_size_sect;
1012
1013         if (la_size_changed || md_moved || rs) {
1014                 u32 prev_flags;
1015
1016                 /* We do some synchronous IO below, which may take some time.
1017                  * Clear the timer, to avoid scary "timer expired!" messages,
1018                  * "Superblock" is written out at least twice below, anyways. */
1019                 del_timer(&device->md_sync_timer);
1020
1021                 /* We won't change the "al-extents" setting, we just may need
1022                  * to move the on-disk location of the activity log ringbuffer.
1023                  * Lock for transaction is good enough, it may well be "dirty"
1024                  * or even "starving". */
1025                 wait_event(device->al_wait, lc_try_lock_for_transaction(device->act_log));
1026
1027                 /* mark current on-disk bitmap and activity log as unreliable */
1028                 prev_flags = md->flags;
1029                 md->flags |= MDF_FULL_SYNC | MDF_AL_DISABLED;
1030                 drbd_md_write(device, buffer);
1031
1032                 drbd_al_initialize(device, buffer);
1033
1034                 drbd_info(device, "Writing the whole bitmap, %s\n",
1035                          la_size_changed && md_moved ? "size changed and md moved" :
1036                          la_size_changed ? "size changed" : "md moved");
1037                 /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
1038                 drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
1039                                "size changed", BM_LOCKED_MASK);
1040
1041                 /* on-disk bitmap and activity log is authoritative again
1042                  * (unless there was an IO error meanwhile...) */
1043                 md->flags = prev_flags;
1044                 drbd_md_write(device, buffer);
1045
1046                 if (rs)
1047                         drbd_info(device, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
1048                                   md->al_stripes, md->al_stripe_size_4k * 4);
1049         }
1050
1051         if (size > prev.last_agreed_sect)
1052                 rv = prev.last_agreed_sect ? DS_GREW : DS_GREW_FROM_ZERO;
1053         if (size < prev.last_agreed_sect)
1054                 rv = DS_SHRUNK;
1055
1056         if (0) {
1057         err_out:
1058                 /* restore previous offset and sizes */
1059                 md->la_size_sect = prev.last_agreed_sect;
1060                 md->md_offset = prev.md_offset;
1061                 md->al_offset = prev.al_offset;
1062                 md->bm_offset = prev.bm_offset;
1063                 md->md_size_sect = prev.md_size_sect;
1064                 md->al_stripes = prev.al_stripes;
1065                 md->al_stripe_size_4k = prev.al_stripe_size_4k;
1066                 md->al_size_4k = (u64)prev.al_stripes * prev.al_stripe_size_4k;
1067         }
1068         lc_unlock(device->act_log);
1069         wake_up(&device->al_wait);
1070         drbd_md_put_buffer(device);
1071         drbd_resume_io(device);
1072
1073         return rv;
1074 }
1075
1076 sector_t
1077 drbd_new_dev_size(struct drbd_device *device, struct drbd_backing_dev *bdev,
1078                   sector_t u_size, int assume_peer_has_space)
1079 {
1080         sector_t p_size = device->p_size;   /* partner's disk size. */
1081         sector_t la_size_sect = bdev->md.la_size_sect; /* last agreed size. */
1082         sector_t m_size; /* my size */
1083         sector_t size = 0;
1084
1085         m_size = drbd_get_max_capacity(bdev);
1086
1087         if (device->state.conn < C_CONNECTED && assume_peer_has_space) {
1088                 drbd_warn(device, "Resize while not connected was forced by the user!\n");
1089                 p_size = m_size;
1090         }
1091
1092         if (p_size && m_size) {
1093                 size = min_t(sector_t, p_size, m_size);
1094         } else {
1095                 if (la_size_sect) {
1096                         size = la_size_sect;
1097                         if (m_size && m_size < size)
1098                                 size = m_size;
1099                         if (p_size && p_size < size)
1100                                 size = p_size;
1101                 } else {
1102                         if (m_size)
1103                                 size = m_size;
1104                         if (p_size)
1105                                 size = p_size;
1106                 }
1107         }
1108
1109         if (size == 0)
1110                 drbd_err(device, "Both nodes diskless!\n");
1111
1112         if (u_size) {
1113                 if (u_size > size)
1114                         drbd_err(device, "Requested disk size is too big (%lu > %lu)\n",
1115                             (unsigned long)u_size>>1, (unsigned long)size>>1);
1116                 else
1117                         size = u_size;
1118         }
1119
1120         return size;
1121 }
1122
1123 /**
1124  * drbd_check_al_size() - Ensures that the AL is of the right size
1125  * @device:     DRBD device.
1126  *
1127  * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
1128  * failed, and 0 on success. You should call drbd_md_sync() after you called
1129  * this function.
1130  */
1131 static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
1132 {
1133         struct lru_cache *n, *t;
1134         struct lc_element *e;
1135         unsigned int in_use;
1136         int i;
1137
1138         if (device->act_log &&
1139             device->act_log->nr_elements == dc->al_extents)
1140                 return 0;
1141
1142         in_use = 0;
1143         t = device->act_log;
1144         n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
1145                 dc->al_extents, sizeof(struct lc_element), 0);
1146
1147         if (n == NULL) {
1148                 drbd_err(device, "Cannot allocate act_log lru!\n");
1149                 return -ENOMEM;
1150         }
1151         spin_lock_irq(&device->al_lock);
1152         if (t) {
1153                 for (i = 0; i < t->nr_elements; i++) {
1154                         e = lc_element_by_index(t, i);
1155                         if (e->refcnt)
1156                                 drbd_err(device, "refcnt(%d)==%d\n",
1157                                     e->lc_number, e->refcnt);
1158                         in_use += e->refcnt;
1159                 }
1160         }
1161         if (!in_use)
1162                 device->act_log = n;
1163         spin_unlock_irq(&device->al_lock);
1164         if (in_use) {
1165                 drbd_err(device, "Activity log still in use!\n");
1166                 lc_destroy(n);
1167                 return -EBUSY;
1168         } else {
1169                 lc_destroy(t);
1170         }
1171         drbd_md_mark_dirty(device); /* we changed device->act_log->nr_elemens */
1172         return 0;
1173 }
1174
1175 static void blk_queue_discard_granularity(struct request_queue *q, unsigned int granularity)
1176 {
1177         q->limits.discard_granularity = granularity;
1178 }
1179
1180 static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection)
1181 {
1182         /* when we introduced REQ_WRITE_SAME support, we also bumped
1183          * our maximum supported batch bio size used for discards. */
1184         if (connection->agreed_features & DRBD_FF_WSAME)
1185                 return DRBD_MAX_BBIO_SECTORS;
1186         /* before, with DRBD <= 8.4.6, we only allowed up to one AL_EXTENT_SIZE. */
1187         return AL_EXTENT_SIZE >> 9;
1188 }
1189
1190 static void decide_on_discard_support(struct drbd_device *device,
1191                         struct request_queue *q,
1192                         struct request_queue *b,
1193                         bool discard_zeroes_if_aligned)
1194 {
1195         /* q = drbd device queue (device->rq_queue)
1196          * b = backing device queue (device->ldev->backing_bdev->bd_disk->queue),
1197          *     or NULL if diskless
1198          */
1199         struct drbd_connection *connection = first_peer_device(device)->connection;
1200         bool can_do = b ? blk_queue_discard(b) : true;
1201
1202         if (can_do && b && !b->limits.discard_zeroes_data && !discard_zeroes_if_aligned) {
1203                 can_do = false;
1204                 drbd_info(device, "discard_zeroes_data=0 and discard_zeroes_if_aligned=no: disabling discards\n");
1205         }
1206         if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_TRIM)) {
1207                 can_do = false;
1208                 drbd_info(connection, "peer DRBD too old, does not support TRIM: disabling discards\n");
1209         }
1210         if (can_do) {
1211                 /* We don't care for the granularity, really.
1212                  * Stacking limits below should fix it for the local
1213                  * device.  Whether or not it is a suitable granularity
1214                  * on the remote device is not our problem, really. If
1215                  * you care, you need to use devices with similar
1216                  * topology on all peers. */
1217                 blk_queue_discard_granularity(q, 512);
1218                 q->limits.max_discard_sectors = drbd_max_discard_sectors(connection);
1219                 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
1220         } else {
1221                 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
1222                 blk_queue_discard_granularity(q, 0);
1223                 q->limits.max_discard_sectors = 0;
1224         }
1225 }
1226
1227 static void fixup_discard_if_not_supported(struct request_queue *q)
1228 {
1229         /* To avoid confusion, if this queue does not support discard, clear
1230          * max_discard_sectors, which is what lsblk -D reports to the user.
1231          * Older kernels got this wrong in "stack limits".
1232          * */
1233         if (!blk_queue_discard(q)) {
1234                 blk_queue_max_discard_sectors(q, 0);
1235                 blk_queue_discard_granularity(q, 0);
1236         }
1237 }
1238
1239 static void decide_on_write_same_support(struct drbd_device *device,
1240                         struct request_queue *q,
1241                         struct request_queue *b, struct o_qlim *o)
1242 {
1243         struct drbd_peer_device *peer_device = first_peer_device(device);
1244         struct drbd_connection *connection = peer_device->connection;
1245         bool can_do = b ? b->limits.max_write_same_sectors : true;
1246
1247         if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_WSAME)) {
1248                 can_do = false;
1249                 drbd_info(peer_device, "peer does not support WRITE_SAME\n");
1250         }
1251
1252         if (o) {
1253                 /* logical block size; queue_logical_block_size(NULL) is 512 */
1254                 unsigned int peer_lbs = be32_to_cpu(o->logical_block_size);
1255                 unsigned int me_lbs_b = queue_logical_block_size(b);
1256                 unsigned int me_lbs = queue_logical_block_size(q);
1257
1258                 if (me_lbs_b != me_lbs) {
1259                         drbd_warn(device,
1260                                 "logical block size of local backend does not match (drbd:%u, backend:%u); was this a late attach?\n",
1261                                 me_lbs, me_lbs_b);
1262                         /* rather disable write same than trigger some BUG_ON later in the scsi layer. */
1263                         can_do = false;
1264                 }
1265                 if (me_lbs_b != peer_lbs) {
1266                         drbd_warn(peer_device, "logical block sizes do not match (me:%u, peer:%u); this may cause problems.\n",
1267                                 me_lbs, peer_lbs);
1268                         if (can_do) {
1269                                 drbd_dbg(peer_device, "logical block size mismatch: WRITE_SAME disabled.\n");
1270                                 can_do = false;
1271                         }
1272                         me_lbs = max(me_lbs, me_lbs_b);
1273                         /* We cannot change the logical block size of an in-use queue.
1274                          * We can only hope that access happens to be properly aligned.
1275                          * If not, the peer will likely produce an IO error, and detach. */
1276                         if (peer_lbs > me_lbs) {
1277                                 if (device->state.role != R_PRIMARY) {
1278                                         blk_queue_logical_block_size(q, peer_lbs);
1279                                         drbd_warn(peer_device, "logical block size set to %u\n", peer_lbs);
1280                                 } else {
1281                                         drbd_warn(peer_device,
1282                                                 "current Primary must NOT adjust logical block size (%u -> %u); hope for the best.\n",
1283                                                 me_lbs, peer_lbs);
1284                                 }
1285                         }
1286                 }
1287                 if (can_do && !o->write_same_capable) {
1288                         /* If we introduce an open-coded write-same loop on the receiving side,
1289                          * the peer would present itself as "capable". */
1290                         drbd_dbg(peer_device, "WRITE_SAME disabled (peer device not capable)\n");
1291                         can_do = false;
1292                 }
1293         }
1294
1295         blk_queue_max_write_same_sectors(q, can_do ? DRBD_MAX_BBIO_SECTORS : 0);
1296 }
1297
1298 static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev,
1299                                    unsigned int max_bio_size, struct o_qlim *o)
1300 {
1301         struct request_queue * const q = device->rq_queue;
1302         unsigned int max_hw_sectors = max_bio_size >> 9;
1303         unsigned int max_segments = 0;
1304         struct request_queue *b = NULL;
1305         struct disk_conf *dc;
1306         bool discard_zeroes_if_aligned = true;
1307
1308         if (bdev) {
1309                 b = bdev->backing_bdev->bd_disk->queue;
1310
1311                 max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
1312                 rcu_read_lock();
1313                 dc = rcu_dereference(device->ldev->disk_conf);
1314                 max_segments = dc->max_bio_bvecs;
1315                 discard_zeroes_if_aligned = dc->discard_zeroes_if_aligned;
1316                 rcu_read_unlock();
1317
1318                 blk_set_stacking_limits(&q->limits);
1319         }
1320
1321         blk_queue_max_hw_sectors(q, max_hw_sectors);
1322         /* This is the workaround for "bio would need to, but cannot, be split" */
1323         blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
1324         blk_queue_segment_boundary(q, PAGE_SIZE-1);
1325         decide_on_discard_support(device, q, b, discard_zeroes_if_aligned);
1326         decide_on_write_same_support(device, q, b, o);
1327
1328         if (b) {
1329                 blk_queue_stack_limits(q, b);
1330
1331                 if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
1332                         drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
1333                                  q->backing_dev_info.ra_pages,
1334                                  b->backing_dev_info.ra_pages);
1335                         q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
1336                 }
1337         }
1338         fixup_discard_if_not_supported(q);
1339 }
1340
1341 void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o)
1342 {
1343         unsigned int now, new, local, peer;
1344
1345         now = queue_max_hw_sectors(device->rq_queue) << 9;
1346         local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
1347         peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
1348
1349         if (bdev) {
1350                 local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9;
1351                 device->local_max_bio_size = local;
1352         }
1353         local = min(local, DRBD_MAX_BIO_SIZE);
1354
1355         /* We may ignore peer limits if the peer is modern enough.
1356            Because new from 8.3.8 onwards the peer can use multiple
1357            BIOs for a single peer_request */
1358         if (device->state.conn >= C_WF_REPORT_PARAMS) {
1359                 if (first_peer_device(device)->connection->agreed_pro_version < 94)
1360                         peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
1361                         /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
1362                 else if (first_peer_device(device)->connection->agreed_pro_version == 94)
1363                         peer = DRBD_MAX_SIZE_H80_PACKET;
1364                 else if (first_peer_device(device)->connection->agreed_pro_version < 100)
1365                         peer = DRBD_MAX_BIO_SIZE_P95;  /* drbd 8.3.8 onwards, before 8.4.0 */
1366                 else
1367                         peer = DRBD_MAX_BIO_SIZE;
1368
1369                 /* We may later detach and re-attach on a disconnected Primary.
1370                  * Avoid this setting to jump back in that case.
1371                  * We want to store what we know the peer DRBD can handle,
1372                  * not what the peer IO backend can handle. */
1373                 if (peer > device->peer_max_bio_size)
1374                         device->peer_max_bio_size = peer;
1375         }
1376         new = min(local, peer);
1377
1378         if (device->state.role == R_PRIMARY && new < now)
1379                 drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now);
1380
1381         if (new != now)
1382                 drbd_info(device, "max BIO size = %u\n", new);
1383
1384         drbd_setup_queue_param(device, bdev, new, o);
1385 }
1386
1387 /* Starts the worker thread */
1388 static void conn_reconfig_start(struct drbd_connection *connection)
1389 {
1390         drbd_thread_start(&connection->worker);
1391         drbd_flush_workqueue(&connection->sender_work);
1392 }
1393
1394 /* if still unconfigured, stops worker again. */
1395 static void conn_reconfig_done(struct drbd_connection *connection)
1396 {
1397         bool stop_threads;
1398         spin_lock_irq(&connection->resource->req_lock);
1399         stop_threads = conn_all_vols_unconf(connection) &&
1400                 connection->cstate == C_STANDALONE;
1401         spin_unlock_irq(&connection->resource->req_lock);
1402         if (stop_threads) {
1403                 /* ack_receiver thread and ack_sender workqueue are implicitly
1404                  * stopped by receiver in conn_disconnect() */
1405                 drbd_thread_stop(&connection->receiver);
1406                 drbd_thread_stop(&connection->worker);
1407         }
1408 }
1409
1410 /* Make sure IO is suspended before calling this function(). */
1411 static void drbd_suspend_al(struct drbd_device *device)
1412 {
1413         int s = 0;
1414
1415         if (!lc_try_lock(device->act_log)) {
1416                 drbd_warn(device, "Failed to lock al in drbd_suspend_al()\n");
1417                 return;
1418         }
1419
1420         drbd_al_shrink(device);
1421         spin_lock_irq(&device->resource->req_lock);
1422         if (device->state.conn < C_CONNECTED)
1423                 s = !test_and_set_bit(AL_SUSPENDED, &device->flags);
1424         spin_unlock_irq(&device->resource->req_lock);
1425         lc_unlock(device->act_log);
1426
1427         if (s)
1428                 drbd_info(device, "Suspended AL updates\n");
1429 }
1430
1431
1432 static bool should_set_defaults(struct genl_info *info)
1433 {
1434         unsigned flags = ((struct drbd_genlmsghdr*)info->userhdr)->flags;
1435         return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS);
1436 }
1437
1438 static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev)
1439 {
1440         /* This is limited by 16 bit "slot" numbers,
1441          * and by available on-disk context storage.
1442          *
1443          * Also (u16)~0 is special (denotes a "free" extent).
1444          *
1445          * One transaction occupies one 4kB on-disk block,
1446          * we have n such blocks in the on disk ring buffer,
1447          * the "current" transaction may fail (n-1),
1448          * and there is 919 slot numbers context information per transaction.
1449          *
1450          * 72 transaction blocks amounts to more than 2**16 context slots,
1451          * so cap there first.
1452          */
1453         const unsigned int max_al_nr = DRBD_AL_EXTENTS_MAX;
1454         const unsigned int sufficient_on_disk =
1455                 (max_al_nr + AL_CONTEXT_PER_TRANSACTION -1)
1456                 /AL_CONTEXT_PER_TRANSACTION;
1457
1458         unsigned int al_size_4k = bdev->md.al_size_4k;
1459
1460         if (al_size_4k > sufficient_on_disk)
1461                 return max_al_nr;
1462
1463         return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION;
1464 }
1465
1466 static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b)
1467 {
1468         return  a->disk_barrier != b->disk_barrier ||
1469                 a->disk_flushes != b->disk_flushes ||
1470                 a->disk_drain != b->disk_drain;
1471 }
1472
1473 static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *disk_conf,
1474                                struct drbd_backing_dev *nbc)
1475 {
1476         struct request_queue * const q = nbc->backing_bdev->bd_disk->queue;
1477
1478         if (disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1479                 disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1480         if (disk_conf->al_extents > drbd_al_extents_max(nbc))
1481                 disk_conf->al_extents = drbd_al_extents_max(nbc);
1482
1483         if (!blk_queue_discard(q)
1484             || (!q->limits.discard_zeroes_data && !disk_conf->discard_zeroes_if_aligned)) {
1485                 if (disk_conf->rs_discard_granularity) {
1486                         disk_conf->rs_discard_granularity = 0; /* disable feature */
1487                         drbd_info(device, "rs_discard_granularity feature disabled\n");
1488                 }
1489         }
1490
1491         if (disk_conf->rs_discard_granularity) {
1492                 int orig_value = disk_conf->rs_discard_granularity;
1493                 int remainder;
1494
1495                 if (q->limits.discard_granularity > disk_conf->rs_discard_granularity)
1496                         disk_conf->rs_discard_granularity = q->limits.discard_granularity;
1497
1498                 remainder = disk_conf->rs_discard_granularity % q->limits.discard_granularity;
1499                 disk_conf->rs_discard_granularity += remainder;
1500
1501                 if (disk_conf->rs_discard_granularity > q->limits.max_discard_sectors << 9)
1502                         disk_conf->rs_discard_granularity = q->limits.max_discard_sectors << 9;
1503
1504                 if (disk_conf->rs_discard_granularity != orig_value)
1505                         drbd_info(device, "rs_discard_granularity changed to %d\n",
1506                                   disk_conf->rs_discard_granularity);
1507         }
1508 }
1509
1510 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
1511 {
1512         struct drbd_config_context adm_ctx;
1513         enum drbd_ret_code retcode;
1514         struct drbd_device *device;
1515         struct disk_conf *new_disk_conf, *old_disk_conf;
1516         struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
1517         int err, fifo_size;
1518
1519         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1520         if (!adm_ctx.reply_skb)
1521                 return retcode;
1522         if (retcode != NO_ERROR)
1523                 goto finish;
1524
1525         device = adm_ctx.device;
1526         mutex_lock(&adm_ctx.resource->adm_mutex);
1527
1528         /* we also need a disk
1529          * to change the options on */
1530         if (!get_ldev(device)) {
1531                 retcode = ERR_NO_DISK;
1532                 goto out;
1533         }
1534
1535         new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
1536         if (!new_disk_conf) {
1537                 retcode = ERR_NOMEM;
1538                 goto fail;
1539         }
1540
1541         mutex_lock(&device->resource->conf_update);
1542         old_disk_conf = device->ldev->disk_conf;
1543         *new_disk_conf = *old_disk_conf;
1544         if (should_set_defaults(info))
1545                 set_disk_conf_defaults(new_disk_conf);
1546
1547         err = disk_conf_from_attrs_for_change(new_disk_conf, info);
1548         if (err && err != -ENOMSG) {
1549                 retcode = ERR_MANDATORY_TAG;
1550                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1551                 goto fail_unlock;
1552         }
1553
1554         if (!expect(new_disk_conf->resync_rate >= 1))
1555                 new_disk_conf->resync_rate = 1;
1556
1557         sanitize_disk_conf(device, new_disk_conf, device->ldev);
1558
1559         if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1560                 new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1561
1562         fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
1563         if (fifo_size != device->rs_plan_s->size) {
1564                 new_plan = fifo_alloc(fifo_size);
1565                 if (!new_plan) {
1566                         drbd_err(device, "kmalloc of fifo_buffer failed");
1567                         retcode = ERR_NOMEM;
1568                         goto fail_unlock;
1569                 }
1570         }
1571
1572         drbd_suspend_io(device);
1573         wait_event(device->al_wait, lc_try_lock(device->act_log));
1574         drbd_al_shrink(device);
1575         err = drbd_check_al_size(device, new_disk_conf);
1576         lc_unlock(device->act_log);
1577         wake_up(&device->al_wait);
1578         drbd_resume_io(device);
1579
1580         if (err) {
1581                 retcode = ERR_NOMEM;
1582                 goto fail_unlock;
1583         }
1584
1585         lock_all_resources();
1586         retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1587         if (retcode == NO_ERROR) {
1588                 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
1589                 drbd_resync_after_changed(device);
1590         }
1591         unlock_all_resources();
1592
1593         if (retcode != NO_ERROR)
1594                 goto fail_unlock;
1595
1596         if (new_plan) {
1597                 old_plan = device->rs_plan_s;
1598                 rcu_assign_pointer(device->rs_plan_s, new_plan);
1599         }
1600
1601         mutex_unlock(&device->resource->conf_update);
1602
1603         if (new_disk_conf->al_updates)
1604                 device->ldev->md.flags &= ~MDF_AL_DISABLED;
1605         else
1606                 device->ldev->md.flags |= MDF_AL_DISABLED;
1607
1608         if (new_disk_conf->md_flushes)
1609                 clear_bit(MD_NO_FUA, &device->flags);
1610         else
1611                 set_bit(MD_NO_FUA, &device->flags);
1612
1613         if (write_ordering_changed(old_disk_conf, new_disk_conf))
1614                 drbd_bump_write_ordering(device->resource, NULL, WO_BDEV_FLUSH);
1615
1616         if (old_disk_conf->discard_zeroes_if_aligned != new_disk_conf->discard_zeroes_if_aligned)
1617                 drbd_reconsider_queue_parameters(device, device->ldev, NULL);
1618
1619         drbd_md_sync(device);
1620
1621         if (device->state.conn >= C_CONNECTED) {
1622                 struct drbd_peer_device *peer_device;
1623
1624                 for_each_peer_device(peer_device, device)
1625                         drbd_send_sync_param(peer_device);
1626         }
1627
1628         synchronize_rcu();
1629         kfree(old_disk_conf);
1630         kfree(old_plan);
1631         mod_timer(&device->request_timer, jiffies + HZ);
1632         goto success;
1633
1634 fail_unlock:
1635         mutex_unlock(&device->resource->conf_update);
1636  fail:
1637         kfree(new_disk_conf);
1638         kfree(new_plan);
1639 success:
1640         put_ldev(device);
1641  out:
1642         mutex_unlock(&adm_ctx.resource->adm_mutex);
1643  finish:
1644         drbd_adm_finish(&adm_ctx, info, retcode);
1645         return 0;
1646 }
1647
1648 static struct block_device *open_backing_dev(struct drbd_device *device,
1649                 const char *bdev_path, void *claim_ptr, bool do_bd_link)
1650 {
1651         struct block_device *bdev;
1652         int err = 0;
1653
1654         bdev = blkdev_get_by_path(bdev_path,
1655                                   FMODE_READ | FMODE_WRITE | FMODE_EXCL, claim_ptr);
1656         if (IS_ERR(bdev)) {
1657                 drbd_err(device, "open(\"%s\") failed with %ld\n",
1658                                 bdev_path, PTR_ERR(bdev));
1659                 return bdev;
1660         }
1661
1662         if (!do_bd_link)
1663                 return bdev;
1664
1665         err = bd_link_disk_holder(bdev, device->vdisk);
1666         if (err) {
1667                 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1668                 drbd_err(device, "bd_link_disk_holder(\"%s\", ...) failed with %d\n",
1669                                 bdev_path, err);
1670                 bdev = ERR_PTR(err);
1671         }
1672         return bdev;
1673 }
1674
1675 static int open_backing_devices(struct drbd_device *device,
1676                 struct disk_conf *new_disk_conf,
1677                 struct drbd_backing_dev *nbc)
1678 {
1679         struct block_device *bdev;
1680
1681         bdev = open_backing_dev(device, new_disk_conf->backing_dev, device, true);
1682         if (IS_ERR(bdev))
1683                 return ERR_OPEN_DISK;
1684         nbc->backing_bdev = bdev;
1685
1686         /*
1687          * meta_dev_idx >= 0: external fixed size, possibly multiple
1688          * drbd sharing one meta device.  TODO in that case, paranoia
1689          * check that [md_bdev, meta_dev_idx] is not yet used by some
1690          * other drbd minor!  (if you use drbd.conf + drbdadm, that
1691          * should check it for you already; but if you don't, or
1692          * someone fooled it, we need to double check here)
1693          */
1694         bdev = open_backing_dev(device, new_disk_conf->meta_dev,
1695                 /* claim ptr: device, if claimed exclusively; shared drbd_m_holder,
1696                  * if potentially shared with other drbd minors */
1697                         (new_disk_conf->meta_dev_idx < 0) ? (void*)device : (void*)drbd_m_holder,
1698                 /* avoid double bd_claim_by_disk() for the same (source,target) tuple,
1699                  * as would happen with internal metadata. */
1700                         (new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_FLEX_INT &&
1701                          new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_INTERNAL));
1702         if (IS_ERR(bdev))
1703                 return ERR_OPEN_MD_DISK;
1704         nbc->md_bdev = bdev;
1705         return NO_ERROR;
1706 }
1707
1708 static void close_backing_dev(struct drbd_device *device, struct block_device *bdev,
1709         bool do_bd_unlink)
1710 {
1711         if (!bdev)
1712                 return;
1713         if (do_bd_unlink)
1714                 bd_unlink_disk_holder(bdev, device->vdisk);
1715         blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1716 }
1717
1718 void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev)
1719 {
1720         if (ldev == NULL)
1721                 return;
1722
1723         close_backing_dev(device, ldev->md_bdev, ldev->md_bdev != ldev->backing_bdev);
1724         close_backing_dev(device, ldev->backing_bdev, true);
1725
1726         kfree(ldev->disk_conf);
1727         kfree(ldev);
1728 }
1729
1730 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1731 {
1732         struct drbd_config_context adm_ctx;
1733         struct drbd_device *device;
1734         struct drbd_peer_device *peer_device;
1735         struct drbd_connection *connection;
1736         int err;
1737         enum drbd_ret_code retcode;
1738         enum determine_dev_size dd;
1739         sector_t max_possible_sectors;
1740         sector_t min_md_device_sectors;
1741         struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
1742         struct disk_conf *new_disk_conf = NULL;
1743         struct lru_cache *resync_lru = NULL;
1744         struct fifo_buffer *new_plan = NULL;
1745         union drbd_state ns, os;
1746         enum drbd_state_rv rv;
1747         struct net_conf *nc;
1748
1749         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1750         if (!adm_ctx.reply_skb)
1751                 return retcode;
1752         if (retcode != NO_ERROR)
1753                 goto finish;
1754
1755         device = adm_ctx.device;
1756         mutex_lock(&adm_ctx.resource->adm_mutex);
1757         peer_device = first_peer_device(device);
1758         connection = peer_device->connection;
1759         conn_reconfig_start(connection);
1760
1761         /* if you want to reconfigure, please tear down first */
1762         if (device->state.disk > D_DISKLESS) {
1763                 retcode = ERR_DISK_CONFIGURED;
1764                 goto fail;
1765         }
1766         /* It may just now have detached because of IO error.  Make sure
1767          * drbd_ldev_destroy is done already, we may end up here very fast,
1768          * e.g. if someone calls attach from the on-io-error handler,
1769          * to realize a "hot spare" feature (not that I'd recommend that) */
1770         wait_event(device->misc_wait, !test_bit(GOING_DISKLESS, &device->flags));
1771
1772         /* make sure there is no leftover from previous force-detach attempts */
1773         clear_bit(FORCE_DETACH, &device->flags);
1774         clear_bit(WAS_IO_ERROR, &device->flags);
1775         clear_bit(WAS_READ_ERROR, &device->flags);
1776
1777         /* and no leftover from previously aborted resync or verify, either */
1778         device->rs_total = 0;
1779         device->rs_failed = 0;
1780         atomic_set(&device->rs_pending_cnt, 0);
1781
1782         /* allocation not in the IO path, drbdsetup context */
1783         nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
1784         if (!nbc) {
1785                 retcode = ERR_NOMEM;
1786                 goto fail;
1787         }
1788         spin_lock_init(&nbc->md.uuid_lock);
1789
1790         new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
1791         if (!new_disk_conf) {
1792                 retcode = ERR_NOMEM;
1793                 goto fail;
1794         }
1795         nbc->disk_conf = new_disk_conf;
1796
1797         set_disk_conf_defaults(new_disk_conf);
1798         err = disk_conf_from_attrs(new_disk_conf, info);
1799         if (err) {
1800                 retcode = ERR_MANDATORY_TAG;
1801                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1802                 goto fail;
1803         }
1804
1805         if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1806                 new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1807
1808         new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ);
1809         if (!new_plan) {
1810                 retcode = ERR_NOMEM;
1811                 goto fail;
1812         }
1813
1814         if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
1815                 retcode = ERR_MD_IDX_INVALID;
1816                 goto fail;
1817         }
1818
1819         rcu_read_lock();
1820         nc = rcu_dereference(connection->net_conf);
1821         if (nc) {
1822                 if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
1823                         rcu_read_unlock();
1824                         retcode = ERR_STONITH_AND_PROT_A;
1825                         goto fail;
1826                 }
1827         }
1828         rcu_read_unlock();
1829
1830         retcode = open_backing_devices(device, new_disk_conf, nbc);
1831         if (retcode != NO_ERROR)
1832                 goto fail;
1833
1834         if ((nbc->backing_bdev == nbc->md_bdev) !=
1835             (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
1836              new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
1837                 retcode = ERR_MD_IDX_INVALID;
1838                 goto fail;
1839         }
1840
1841         resync_lru = lc_create("resync", drbd_bm_ext_cache,
1842                         1, 61, sizeof(struct bm_extent),
1843                         offsetof(struct bm_extent, lce));
1844         if (!resync_lru) {
1845                 retcode = ERR_NOMEM;
1846                 goto fail;
1847         }
1848
1849         /* Read our meta data super block early.
1850          * This also sets other on-disk offsets. */
1851         retcode = drbd_md_read(device, nbc);
1852         if (retcode != NO_ERROR)
1853                 goto fail;
1854
1855         sanitize_disk_conf(device, new_disk_conf, nbc);
1856
1857         if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) {
1858                 drbd_err(device, "max capacity %llu smaller than disk size %llu\n",
1859                         (unsigned long long) drbd_get_max_capacity(nbc),
1860                         (unsigned long long) new_disk_conf->disk_size);
1861                 retcode = ERR_DISK_TOO_SMALL;
1862                 goto fail;
1863         }
1864
1865         if (new_disk_conf->meta_dev_idx < 0) {
1866                 max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
1867                 /* at least one MB, otherwise it does not make sense */
1868                 min_md_device_sectors = (2<<10);
1869         } else {
1870                 max_possible_sectors = DRBD_MAX_SECTORS;
1871                 min_md_device_sectors = MD_128MB_SECT * (new_disk_conf->meta_dev_idx + 1);
1872         }
1873
1874         if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
1875                 retcode = ERR_MD_DISK_TOO_SMALL;
1876                 drbd_warn(device, "refusing attach: md-device too small, "
1877                      "at least %llu sectors needed for this meta-disk type\n",
1878                      (unsigned long long) min_md_device_sectors);
1879                 goto fail;
1880         }
1881
1882         /* Make sure the new disk is big enough
1883          * (we may currently be R_PRIMARY with no local disk...) */
1884         if (drbd_get_max_capacity(nbc) <
1885             drbd_get_capacity(device->this_bdev)) {
1886                 retcode = ERR_DISK_TOO_SMALL;
1887                 goto fail;
1888         }
1889
1890         nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
1891
1892         if (nbc->known_size > max_possible_sectors) {
1893                 drbd_warn(device, "==> truncating very big lower level device "
1894                         "to currently maximum possible %llu sectors <==\n",
1895                         (unsigned long long) max_possible_sectors);
1896                 if (new_disk_conf->meta_dev_idx >= 0)
1897                         drbd_warn(device, "==>> using internal or flexible "
1898                                       "meta data may help <<==\n");
1899         }
1900
1901         drbd_suspend_io(device);
1902         /* also wait for the last barrier ack. */
1903         /* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171
1904          * We need a way to either ignore barrier acks for barriers sent before a device
1905          * was attached, or a way to wait for all pending barrier acks to come in.
1906          * As barriers are counted per resource,
1907          * we'd need to suspend io on all devices of a resource.
1908          */
1909         wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device));
1910         /* and for any other previously queued work */
1911         drbd_flush_workqueue(&connection->sender_work);
1912
1913         rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE);
1914         retcode = rv;  /* FIXME: Type mismatch. */
1915         drbd_resume_io(device);
1916         if (rv < SS_SUCCESS)
1917                 goto fail;
1918
1919         if (!get_ldev_if_state(device, D_ATTACHING))
1920                 goto force_diskless;
1921
1922         if (!device->bitmap) {
1923                 if (drbd_bm_init(device)) {
1924                         retcode = ERR_NOMEM;
1925                         goto force_diskless_dec;
1926                 }
1927         }
1928
1929         if (device->state.conn < C_CONNECTED &&
1930             device->state.role == R_PRIMARY && device->ed_uuid &&
1931             (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
1932                 drbd_err(device, "Can only attach to data with current UUID=%016llX\n",
1933                     (unsigned long long)device->ed_uuid);
1934                 retcode = ERR_DATA_NOT_CURRENT;
1935                 goto force_diskless_dec;
1936         }
1937
1938         /* Since we are diskless, fix the activity log first... */
1939         if (drbd_check_al_size(device, new_disk_conf)) {
1940                 retcode = ERR_NOMEM;
1941                 goto force_diskless_dec;
1942         }
1943
1944         /* Prevent shrinking of consistent devices ! */
1945         if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
1946             drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) {
1947                 drbd_warn(device, "refusing to truncate a consistent device\n");
1948                 retcode = ERR_DISK_TOO_SMALL;
1949                 goto force_diskless_dec;
1950         }
1951
1952         lock_all_resources();
1953         retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1954         if (retcode != NO_ERROR) {
1955                 unlock_all_resources();
1956                 goto force_diskless_dec;
1957         }
1958
1959         /* Reset the "barriers don't work" bits here, then force meta data to
1960          * be written, to ensure we determine if barriers are supported. */
1961         if (new_disk_conf->md_flushes)
1962                 clear_bit(MD_NO_FUA, &device->flags);
1963         else
1964                 set_bit(MD_NO_FUA, &device->flags);
1965
1966         /* Point of no return reached.
1967          * Devices and memory are no longer released by error cleanup below.
1968          * now device takes over responsibility, and the state engine should
1969          * clean it up somewhere.  */
1970         D_ASSERT(device, device->ldev == NULL);
1971         device->ldev = nbc;
1972         device->resync = resync_lru;
1973         device->rs_plan_s = new_plan;
1974         nbc = NULL;
1975         resync_lru = NULL;
1976         new_disk_conf = NULL;
1977         new_plan = NULL;
1978
1979         drbd_resync_after_changed(device);
1980         drbd_bump_write_ordering(device->resource, device->ldev, WO_BDEV_FLUSH);
1981         unlock_all_resources();
1982
1983         if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
1984                 set_bit(CRASHED_PRIMARY, &device->flags);
1985         else
1986                 clear_bit(CRASHED_PRIMARY, &device->flags);
1987
1988         if (drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1989             !(device->state.role == R_PRIMARY && device->resource->susp_nod))
1990                 set_bit(CRASHED_PRIMARY, &device->flags);
1991
1992         device->send_cnt = 0;
1993         device->recv_cnt = 0;
1994         device->read_cnt = 0;
1995         device->writ_cnt = 0;
1996
1997         drbd_reconsider_queue_parameters(device, device->ldev, NULL);
1998
1999         /* If I am currently not R_PRIMARY,
2000          * but meta data primary indicator is set,
2001          * I just now recover from a hard crash,
2002          * and have been R_PRIMARY before that crash.
2003          *
2004          * Now, if I had no connection before that crash
2005          * (have been degraded R_PRIMARY), chances are that
2006          * I won't find my peer now either.
2007          *
2008          * In that case, and _only_ in that case,
2009          * we use the degr-wfc-timeout instead of the default,
2010          * so we can automatically recover from a crash of a
2011          * degraded but active "cluster" after a certain timeout.
2012          */
2013         clear_bit(USE_DEGR_WFC_T, &device->flags);
2014         if (device->state.role != R_PRIMARY &&
2015              drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
2016             !drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND))
2017                 set_bit(USE_DEGR_WFC_T, &device->flags);
2018
2019         dd = drbd_determine_dev_size(device, 0, NULL);
2020         if (dd <= DS_ERROR) {
2021                 retcode = ERR_NOMEM_BITMAP;
2022                 goto force_diskless_dec;
2023         } else if (dd == DS_GREW)
2024                 set_bit(RESYNC_AFTER_NEG, &device->flags);
2025
2026         if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ||
2027             (test_bit(CRASHED_PRIMARY, &device->flags) &&
2028              drbd_md_test_flag(device->ldev, MDF_AL_DISABLED))) {
2029                 drbd_info(device, "Assuming that all blocks are out of sync "
2030                      "(aka FullSync)\n");
2031                 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
2032                         "set_n_write from attaching", BM_LOCKED_MASK)) {
2033                         retcode = ERR_IO_MD_DISK;
2034                         goto force_diskless_dec;
2035                 }
2036         } else {
2037                 if (drbd_bitmap_io(device, &drbd_bm_read,
2038                         "read from attaching", BM_LOCKED_MASK)) {
2039                         retcode = ERR_IO_MD_DISK;
2040                         goto force_diskless_dec;
2041                 }
2042         }
2043
2044         if (_drbd_bm_total_weight(device) == drbd_bm_bits(device))
2045                 drbd_suspend_al(device); /* IO is still suspended here... */
2046
2047         spin_lock_irq(&device->resource->req_lock);
2048         os = drbd_read_state(device);
2049         ns = os;
2050         /* If MDF_CONSISTENT is not set go into inconsistent state,
2051            otherwise investigate MDF_WasUpToDate...
2052            If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
2053            otherwise into D_CONSISTENT state.
2054         */
2055         if (drbd_md_test_flag(device->ldev, MDF_CONSISTENT)) {
2056                 if (drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE))
2057                         ns.disk = D_CONSISTENT;
2058                 else
2059                         ns.disk = D_OUTDATED;
2060         } else {
2061                 ns.disk = D_INCONSISTENT;
2062         }
2063
2064         if (drbd_md_test_flag(device->ldev, MDF_PEER_OUT_DATED))
2065                 ns.pdsk = D_OUTDATED;
2066
2067         rcu_read_lock();
2068         if (ns.disk == D_CONSISTENT &&
2069             (ns.pdsk == D_OUTDATED || rcu_dereference(device->ldev->disk_conf)->fencing == FP_DONT_CARE))
2070                 ns.disk = D_UP_TO_DATE;
2071
2072         /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
2073            MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
2074            this point, because drbd_request_state() modifies these
2075            flags. */
2076
2077         if (rcu_dereference(device->ldev->disk_conf)->al_updates)
2078                 device->ldev->md.flags &= ~MDF_AL_DISABLED;
2079         else
2080                 device->ldev->md.flags |= MDF_AL_DISABLED;
2081
2082         rcu_read_unlock();
2083
2084         /* In case we are C_CONNECTED postpone any decision on the new disk
2085            state after the negotiation phase. */
2086         if (device->state.conn == C_CONNECTED) {
2087                 device->new_state_tmp.i = ns.i;
2088                 ns.i = os.i;
2089                 ns.disk = D_NEGOTIATING;
2090
2091                 /* We expect to receive up-to-date UUIDs soon.
2092                    To avoid a race in receive_state, free p_uuid while
2093                    holding req_lock. I.e. atomic with the state change */
2094                 kfree(device->p_uuid);
2095                 device->p_uuid = NULL;
2096         }
2097
2098         rv = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
2099         spin_unlock_irq(&device->resource->req_lock);
2100
2101         if (rv < SS_SUCCESS)
2102                 goto force_diskless_dec;
2103
2104         mod_timer(&device->request_timer, jiffies + HZ);
2105
2106         if (device->state.role == R_PRIMARY)
2107                 device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
2108         else
2109                 device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
2110
2111         drbd_md_mark_dirty(device);
2112         drbd_md_sync(device);
2113
2114         kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
2115         put_ldev(device);
2116         conn_reconfig_done(connection);
2117         mutex_unlock(&adm_ctx.resource->adm_mutex);
2118         drbd_adm_finish(&adm_ctx, info, retcode);
2119         return 0;
2120
2121  force_diskless_dec:
2122         put_ldev(device);
2123  force_diskless:
2124         drbd_force_state(device, NS(disk, D_DISKLESS));
2125         drbd_md_sync(device);
2126  fail:
2127         conn_reconfig_done(connection);
2128         if (nbc) {
2129                 close_backing_dev(device, nbc->md_bdev, nbc->md_bdev != nbc->backing_bdev);
2130                 close_backing_dev(device, nbc->backing_bdev, true);
2131                 kfree(nbc);
2132         }
2133         kfree(new_disk_conf);
2134         lc_destroy(resync_lru);
2135         kfree(new_plan);
2136         mutex_unlock(&adm_ctx.resource->adm_mutex);
2137  finish:
2138         drbd_adm_finish(&adm_ctx, info, retcode);
2139         return 0;
2140 }
2141
2142 static int adm_detach(struct drbd_device *device, int force)
2143 {
2144         enum drbd_state_rv retcode;
2145         void *buffer;
2146         int ret;
2147
2148         if (force) {
2149                 set_bit(FORCE_DETACH, &device->flags);
2150                 drbd_force_state(device, NS(disk, D_FAILED));
2151                 retcode = SS_SUCCESS;
2152                 goto out;
2153         }
2154
2155         drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
2156         buffer = drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */
2157         if (buffer) {
2158                 retcode = drbd_request_state(device, NS(disk, D_FAILED));
2159                 drbd_md_put_buffer(device);
2160         } else /* already <= D_FAILED */
2161                 retcode = SS_NOTHING_TO_DO;
2162         /* D_FAILED will transition to DISKLESS. */
2163         drbd_resume_io(device);
2164         ret = wait_event_interruptible(device->misc_wait,
2165                         device->state.disk != D_FAILED);
2166         if ((int)retcode == (int)SS_IS_DISKLESS)
2167                 retcode = SS_NOTHING_TO_DO;
2168         if (ret)
2169                 retcode = ERR_INTR;
2170 out:
2171         return retcode;
2172 }
2173
2174 /* Detaching the disk is a process in multiple stages.  First we need to lock
2175  * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
2176  * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
2177  * internal references as well.
2178  * Only then we have finally detached. */
2179 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
2180 {
2181         struct drbd_config_context adm_ctx;
2182         enum drbd_ret_code retcode;
2183         struct detach_parms parms = { };
2184         int err;
2185
2186         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2187         if (!adm_ctx.reply_skb)
2188                 return retcode;
2189         if (retcode != NO_ERROR)
2190                 goto out;
2191
2192         if (info->attrs[DRBD_NLA_DETACH_PARMS]) {
2193                 err = detach_parms_from_attrs(&parms, info);
2194                 if (err) {
2195                         retcode = ERR_MANDATORY_TAG;
2196                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2197                         goto out;
2198                 }
2199         }
2200
2201         mutex_lock(&adm_ctx.resource->adm_mutex);
2202         retcode = adm_detach(adm_ctx.device, parms.force_detach);
2203         mutex_unlock(&adm_ctx.resource->adm_mutex);
2204 out:
2205         drbd_adm_finish(&adm_ctx, info, retcode);
2206         return 0;
2207 }
2208
2209 static bool conn_resync_running(struct drbd_connection *connection)
2210 {
2211         struct drbd_peer_device *peer_device;
2212         bool rv = false;
2213         int vnr;
2214
2215         rcu_read_lock();
2216         idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2217                 struct drbd_device *device = peer_device->device;
2218                 if (device->state.conn == C_SYNC_SOURCE ||
2219                     device->state.conn == C_SYNC_TARGET ||
2220                     device->state.conn == C_PAUSED_SYNC_S ||
2221                     device->state.conn == C_PAUSED_SYNC_T) {
2222                         rv = true;
2223                         break;
2224                 }
2225         }
2226         rcu_read_unlock();
2227
2228         return rv;
2229 }
2230
2231 static bool conn_ov_running(struct drbd_connection *connection)
2232 {
2233         struct drbd_peer_device *peer_device;
2234         bool rv = false;
2235         int vnr;
2236
2237         rcu_read_lock();
2238         idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2239                 struct drbd_device *device = peer_device->device;
2240                 if (device->state.conn == C_VERIFY_S ||
2241                     device->state.conn == C_VERIFY_T) {
2242                         rv = true;
2243                         break;
2244                 }
2245         }
2246         rcu_read_unlock();
2247
2248         return rv;
2249 }
2250
2251 static enum drbd_ret_code
2252 _check_net_options(struct drbd_connection *connection, struct net_conf *old_net_conf, struct net_conf *new_net_conf)
2253 {
2254         struct drbd_peer_device *peer_device;
2255         int i;
2256
2257         if (old_net_conf && connection->cstate == C_WF_REPORT_PARAMS && connection->agreed_pro_version < 100) {
2258                 if (new_net_conf->wire_protocol != old_net_conf->wire_protocol)
2259                         return ERR_NEED_APV_100;
2260
2261                 if (new_net_conf->two_primaries != old_net_conf->two_primaries)
2262                         return ERR_NEED_APV_100;
2263
2264                 if (strcmp(new_net_conf->integrity_alg, old_net_conf->integrity_alg))
2265                         return ERR_NEED_APV_100;
2266         }
2267
2268         if (!new_net_conf->two_primaries &&
2269             conn_highest_role(connection) == R_PRIMARY &&
2270             conn_highest_peer(connection) == R_PRIMARY)
2271                 return ERR_NEED_ALLOW_TWO_PRI;
2272
2273         if (new_net_conf->two_primaries &&
2274             (new_net_conf->wire_protocol != DRBD_PROT_C))
2275                 return ERR_NOT_PROTO_C;
2276
2277         idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2278                 struct drbd_device *device = peer_device->device;
2279                 if (get_ldev(device)) {
2280                         enum drbd_fencing_p fp = rcu_dereference(device->ldev->disk_conf)->fencing;
2281                         put_ldev(device);
2282                         if (new_net_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH)
2283                                 return ERR_STONITH_AND_PROT_A;
2284                 }
2285                 if (device->state.role == R_PRIMARY && new_net_conf->discard_my_data)
2286                         return ERR_DISCARD_IMPOSSIBLE;
2287         }
2288
2289         if (new_net_conf->on_congestion != OC_BLOCK && new_net_conf->wire_protocol != DRBD_PROT_A)
2290                 return ERR_CONG_NOT_PROTO_A;
2291
2292         return NO_ERROR;
2293 }
2294
2295 static enum drbd_ret_code
2296 check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf)
2297 {
2298         static enum drbd_ret_code rv;
2299         struct drbd_peer_device *peer_device;
2300         int i;
2301
2302         rcu_read_lock();
2303         rv = _check_net_options(connection, rcu_dereference(connection->net_conf), new_net_conf);
2304         rcu_read_unlock();
2305
2306         /* connection->peer_devices protected by genl_lock() here */
2307         idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2308                 struct drbd_device *device = peer_device->device;
2309                 if (!device->bitmap) {
2310                         if (drbd_bm_init(device))
2311                                 return ERR_NOMEM;
2312                 }
2313         }
2314
2315         return rv;
2316 }
2317
2318 struct crypto {
2319         struct crypto_ahash *verify_tfm;
2320         struct crypto_ahash *csums_tfm;
2321         struct crypto_shash *cram_hmac_tfm;
2322         struct crypto_ahash *integrity_tfm;
2323 };
2324
2325 static int
2326 alloc_shash(struct crypto_shash **tfm, char *tfm_name, int err_alg)
2327 {
2328         if (!tfm_name[0])
2329                 return NO_ERROR;
2330
2331         *tfm = crypto_alloc_shash(tfm_name, 0, 0);
2332         if (IS_ERR(*tfm)) {
2333                 *tfm = NULL;
2334                 return err_alg;
2335         }
2336
2337         return NO_ERROR;
2338 }
2339
2340 static int
2341 alloc_ahash(struct crypto_ahash **tfm, char *tfm_name, int err_alg)
2342 {
2343         if (!tfm_name[0])
2344                 return NO_ERROR;
2345
2346         *tfm = crypto_alloc_ahash(tfm_name, 0, CRYPTO_ALG_ASYNC);
2347         if (IS_ERR(*tfm)) {
2348                 *tfm = NULL;
2349                 return err_alg;
2350         }
2351
2352         return NO_ERROR;
2353 }
2354
2355 static enum drbd_ret_code
2356 alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf)
2357 {
2358         char hmac_name[CRYPTO_MAX_ALG_NAME];
2359         enum drbd_ret_code rv;
2360
2361         rv = alloc_ahash(&crypto->csums_tfm, new_net_conf->csums_alg,
2362                          ERR_CSUMS_ALG);
2363         if (rv != NO_ERROR)
2364                 return rv;
2365         rv = alloc_ahash(&crypto->verify_tfm, new_net_conf->verify_alg,
2366                          ERR_VERIFY_ALG);
2367         if (rv != NO_ERROR)
2368                 return rv;
2369         rv = alloc_ahash(&crypto->integrity_tfm, new_net_conf->integrity_alg,
2370                          ERR_INTEGRITY_ALG);
2371         if (rv != NO_ERROR)
2372                 return rv;
2373         if (new_net_conf->cram_hmac_alg[0] != 0) {
2374                 snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
2375                          new_net_conf->cram_hmac_alg);
2376
2377                 rv = alloc_shash(&crypto->cram_hmac_tfm, hmac_name,
2378                                  ERR_AUTH_ALG);
2379         }
2380
2381         return rv;
2382 }
2383
2384 static void free_crypto(struct crypto *crypto)
2385 {
2386         crypto_free_shash(crypto->cram_hmac_tfm);
2387         crypto_free_ahash(crypto->integrity_tfm);
2388         crypto_free_ahash(crypto->csums_tfm);
2389         crypto_free_ahash(crypto->verify_tfm);
2390 }
2391
2392 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
2393 {
2394         struct drbd_config_context adm_ctx;
2395         enum drbd_ret_code retcode;
2396         struct drbd_connection *connection;
2397         struct net_conf *old_net_conf, *new_net_conf = NULL;
2398         int err;
2399         int ovr; /* online verify running */
2400         int rsr; /* re-sync running */
2401         struct crypto crypto = { };
2402
2403         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2404         if (!adm_ctx.reply_skb)
2405                 return retcode;
2406         if (retcode != NO_ERROR)
2407                 goto finish;
2408
2409         connection = adm_ctx.connection;
2410         mutex_lock(&adm_ctx.resource->adm_mutex);
2411
2412         new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
2413         if (!new_net_conf) {
2414                 retcode = ERR_NOMEM;
2415                 goto out;
2416         }
2417
2418         conn_reconfig_start(connection);
2419
2420         mutex_lock(&connection->data.mutex);
2421         mutex_lock(&connection->resource->conf_update);
2422         old_net_conf = connection->net_conf;
2423
2424         if (!old_net_conf) {
2425                 drbd_msg_put_info(adm_ctx.reply_skb, "net conf missing, try connect");
2426                 retcode = ERR_INVALID_REQUEST;
2427                 goto fail;
2428         }
2429
2430         *new_net_conf = *old_net_conf;
2431         if (should_set_defaults(info))
2432                 set_net_conf_defaults(new_net_conf);
2433
2434         err = net_conf_from_attrs_for_change(new_net_conf, info);
2435         if (err && err != -ENOMSG) {
2436                 retcode = ERR_MANDATORY_TAG;
2437                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2438                 goto fail;
2439         }
2440
2441         retcode = check_net_options(connection, new_net_conf);
2442         if (retcode != NO_ERROR)
2443                 goto fail;
2444
2445         /* re-sync running */
2446         rsr = conn_resync_running(connection);
2447         if (rsr && strcmp(new_net_conf->csums_alg, old_net_conf->csums_alg)) {
2448                 retcode = ERR_CSUMS_RESYNC_RUNNING;
2449                 goto fail;
2450         }
2451
2452         /* online verify running */
2453         ovr = conn_ov_running(connection);
2454         if (ovr && strcmp(new_net_conf->verify_alg, old_net_conf->verify_alg)) {
2455                 retcode = ERR_VERIFY_RUNNING;
2456                 goto fail;
2457         }
2458
2459         retcode = alloc_crypto(&crypto, new_net_conf);
2460         if (retcode != NO_ERROR)
2461                 goto fail;
2462
2463         rcu_assign_pointer(connection->net_conf, new_net_conf);
2464
2465         if (!rsr) {
2466                 crypto_free_ahash(connection->csums_tfm);
2467                 connection->csums_tfm = crypto.csums_tfm;
2468                 crypto.csums_tfm = NULL;
2469         }
2470         if (!ovr) {
2471                 crypto_free_ahash(connection->verify_tfm);
2472                 connection->verify_tfm = crypto.verify_tfm;
2473                 crypto.verify_tfm = NULL;
2474         }
2475
2476         crypto_free_ahash(connection->integrity_tfm);
2477         connection->integrity_tfm = crypto.integrity_tfm;
2478         if (connection->cstate >= C_WF_REPORT_PARAMS && connection->agreed_pro_version >= 100)
2479                 /* Do this without trying to take connection->data.mutex again.  */
2480                 __drbd_send_protocol(connection, P_PROTOCOL_UPDATE);
2481
2482         crypto_free_shash(connection->cram_hmac_tfm);
2483         connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2484
2485         mutex_unlock(&connection->resource->conf_update);
2486         mutex_unlock(&connection->data.mutex);
2487         synchronize_rcu();
2488         kfree(old_net_conf);
2489
2490         if (connection->cstate >= C_WF_REPORT_PARAMS) {
2491                 struct drbd_peer_device *peer_device;
2492                 int vnr;
2493
2494                 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
2495                         drbd_send_sync_param(peer_device);
2496         }
2497
2498         goto done;
2499
2500  fail:
2501         mutex_unlock(&connection->resource->conf_update);
2502         mutex_unlock(&connection->data.mutex);
2503         free_crypto(&crypto);
2504         kfree(new_net_conf);
2505  done:
2506         conn_reconfig_done(connection);
2507  out:
2508         mutex_unlock(&adm_ctx.resource->adm_mutex);
2509  finish:
2510         drbd_adm_finish(&adm_ctx, info, retcode);
2511         return 0;
2512 }
2513
2514 static void connection_to_info(struct connection_info *info,
2515                                struct drbd_connection *connection)
2516 {
2517         info->conn_connection_state = connection->cstate;
2518         info->conn_role = conn_highest_peer(connection);
2519 }
2520
2521 static void peer_device_to_info(struct peer_device_info *info,
2522                                 struct drbd_peer_device *peer_device)
2523 {
2524         struct drbd_device *device = peer_device->device;
2525
2526         info->peer_repl_state =
2527                 max_t(enum drbd_conns, C_WF_REPORT_PARAMS, device->state.conn);
2528         info->peer_disk_state = device->state.pdsk;
2529         info->peer_resync_susp_user = device->state.user_isp;
2530         info->peer_resync_susp_peer = device->state.peer_isp;
2531         info->peer_resync_susp_dependency = device->state.aftr_isp;
2532 }
2533
2534 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
2535 {
2536         struct connection_info connection_info;
2537         enum drbd_notification_type flags;
2538         unsigned int peer_devices = 0;
2539         struct drbd_config_context adm_ctx;
2540         struct drbd_peer_device *peer_device;
2541         struct net_conf *old_net_conf, *new_net_conf = NULL;
2542         struct crypto crypto = { };
2543         struct drbd_resource *resource;
2544         struct drbd_connection *connection;
2545         enum drbd_ret_code retcode;
2546         int i;
2547         int err;
2548
2549         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2550
2551         if (!adm_ctx.reply_skb)
2552                 return retcode;
2553         if (retcode != NO_ERROR)
2554                 goto out;
2555         if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
2556                 drbd_msg_put_info(adm_ctx.reply_skb, "connection endpoint(s) missing");
2557                 retcode = ERR_INVALID_REQUEST;
2558                 goto out;
2559         }
2560
2561         /* No need for _rcu here. All reconfiguration is
2562          * strictly serialized on genl_lock(). We are protected against
2563          * concurrent reconfiguration/addition/deletion */
2564         for_each_resource(resource, &drbd_resources) {
2565                 for_each_connection(connection, resource) {
2566                         if (nla_len(adm_ctx.my_addr) == connection->my_addr_len &&
2567                             !memcmp(nla_data(adm_ctx.my_addr), &connection->my_addr,
2568                                     connection->my_addr_len)) {
2569                                 retcode = ERR_LOCAL_ADDR;
2570                                 goto out;
2571                         }
2572
2573                         if (nla_len(adm_ctx.peer_addr) == connection->peer_addr_len &&
2574                             !memcmp(nla_data(adm_ctx.peer_addr), &connection->peer_addr,
2575                                     connection->peer_addr_len)) {
2576                                 retcode = ERR_PEER_ADDR;
2577                                 goto out;
2578                         }
2579                 }
2580         }
2581
2582         mutex_lock(&adm_ctx.resource->adm_mutex);
2583         connection = first_connection(adm_ctx.resource);
2584         conn_reconfig_start(connection);
2585
2586         if (connection->cstate > C_STANDALONE) {
2587                 retcode = ERR_NET_CONFIGURED;
2588                 goto fail;
2589         }
2590
2591         /* allocation not in the IO path, drbdsetup / netlink process context */
2592         new_net_conf = kzalloc(sizeof(*new_net_conf), GFP_KERNEL);
2593         if (!new_net_conf) {
2594                 retcode = ERR_NOMEM;
2595                 goto fail;
2596         }
2597
2598         set_net_conf_defaults(new_net_conf);
2599
2600         err = net_conf_from_attrs(new_net_conf, info);
2601         if (err && err != -ENOMSG) {
2602                 retcode = ERR_MANDATORY_TAG;
2603                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2604                 goto fail;
2605         }
2606
2607         retcode = check_net_options(connection, new_net_conf);
2608         if (retcode != NO_ERROR)
2609                 goto fail;
2610
2611         retcode = alloc_crypto(&crypto, new_net_conf);
2612         if (retcode != NO_ERROR)
2613                 goto fail;
2614
2615         ((char *)new_net_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
2616
2617         drbd_flush_workqueue(&connection->sender_work);
2618
2619         mutex_lock(&adm_ctx.resource->conf_update);
2620         old_net_conf = connection->net_conf;
2621         if (old_net_conf) {
2622                 retcode = ERR_NET_CONFIGURED;
2623                 mutex_unlock(&adm_ctx.resource->conf_update);
2624                 goto fail;
2625         }
2626         rcu_assign_pointer(connection->net_conf, new_net_conf);
2627
2628         conn_free_crypto(connection);
2629         connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2630         connection->integrity_tfm = crypto.integrity_tfm;
2631         connection->csums_tfm = crypto.csums_tfm;
2632         connection->verify_tfm = crypto.verify_tfm;
2633
2634         connection->my_addr_len = nla_len(adm_ctx.my_addr);
2635         memcpy(&connection->my_addr, nla_data(adm_ctx.my_addr), connection->my_addr_len);
2636         connection->peer_addr_len = nla_len(adm_ctx.peer_addr);
2637         memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len);
2638
2639         idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2640                 peer_devices++;
2641         }
2642
2643         connection_to_info(&connection_info, connection);
2644         flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
2645         mutex_lock(&notification_mutex);
2646         notify_connection_state(NULL, 0, connection, &connection_info, NOTIFY_CREATE | flags);
2647         idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2648                 struct peer_device_info peer_device_info;
2649
2650                 peer_device_to_info(&peer_device_info, peer_device);
2651                 flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
2652                 notify_peer_device_state(NULL, 0, peer_device, &peer_device_info, NOTIFY_CREATE | flags);
2653         }
2654         mutex_unlock(&notification_mutex);
2655         mutex_unlock(&adm_ctx.resource->conf_update);
2656
2657         rcu_read_lock();
2658         idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2659                 struct drbd_device *device = peer_device->device;
2660                 device->send_cnt = 0;
2661                 device->recv_cnt = 0;
2662         }
2663         rcu_read_unlock();
2664
2665         retcode = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
2666
2667         conn_reconfig_done(connection);
2668         mutex_unlock(&adm_ctx.resource->adm_mutex);
2669         drbd_adm_finish(&adm_ctx, info, retcode);
2670         return 0;
2671
2672 fail:
2673         free_crypto(&crypto);
2674         kfree(new_net_conf);
2675
2676         conn_reconfig_done(connection);
2677         mutex_unlock(&adm_ctx.resource->adm_mutex);
2678 out:
2679         drbd_adm_finish(&adm_ctx, info, retcode);
2680         return 0;
2681 }
2682
2683 static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force)
2684 {
2685         enum drbd_state_rv rv;
2686
2687         rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2688                         force ? CS_HARD : 0);
2689
2690         switch (rv) {
2691         case SS_NOTHING_TO_DO:
2692                 break;
2693         case SS_ALREADY_STANDALONE:
2694                 return SS_SUCCESS;
2695         case SS_PRIMARY_NOP:
2696                 /* Our state checking code wants to see the peer outdated. */
2697                 rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), 0);
2698
2699                 if (rv == SS_OUTDATE_WO_CONN) /* lost connection before graceful disconnect succeeded */
2700                         rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_VERBOSE);
2701
2702                 break;
2703         case SS_CW_FAILED_BY_PEER:
2704                 /* The peer probably wants to see us outdated. */
2705                 rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING,
2706                                                         disk, D_OUTDATED), 0);
2707                 if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
2708                         rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2709                                         CS_HARD);
2710                 }
2711                 break;
2712         default:;
2713                 /* no special handling necessary */
2714         }
2715
2716         if (rv >= SS_SUCCESS) {
2717                 enum drbd_state_rv rv2;
2718                 /* No one else can reconfigure the network while I am here.
2719                  * The state handling only uses drbd_thread_stop_nowait(),
2720                  * we want to really wait here until the receiver is no more.
2721                  */
2722                 drbd_thread_stop(&connection->receiver);
2723
2724                 /* Race breaker.  This additional state change request may be
2725                  * necessary, if this was a forced disconnect during a receiver
2726                  * restart.  We may have "killed" the receiver thread just
2727                  * after drbd_receiver() returned.  Typically, we should be
2728                  * C_STANDALONE already, now, and this becomes a no-op.
2729                  */
2730                 rv2 = conn_request_state(connection, NS(conn, C_STANDALONE),
2731                                 CS_VERBOSE | CS_HARD);
2732                 if (rv2 < SS_SUCCESS)
2733                         drbd_err(connection,
2734                                 "unexpected rv2=%d in conn_try_disconnect()\n",
2735                                 rv2);
2736                 /* Unlike in DRBD 9, the state engine has generated
2737                  * NOTIFY_DESTROY events before clearing connection->net_conf. */
2738         }
2739         return rv;
2740 }
2741
2742 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
2743 {
2744         struct drbd_config_context adm_ctx;
2745         struct disconnect_parms parms;
2746         struct drbd_connection *connection;
2747         enum drbd_state_rv rv;
2748         enum drbd_ret_code retcode;
2749         int err;
2750
2751         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2752         if (!adm_ctx.reply_skb)
2753                 return retcode;
2754         if (retcode != NO_ERROR)
2755                 goto fail;
2756
2757         connection = adm_ctx.connection;
2758         memset(&parms, 0, sizeof(parms));
2759         if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
2760                 err = disconnect_parms_from_attrs(&parms, info);
2761                 if (err) {
2762                         retcode = ERR_MANDATORY_TAG;
2763                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2764                         goto fail;
2765                 }
2766         }
2767
2768         mutex_lock(&adm_ctx.resource->adm_mutex);
2769         rv = conn_try_disconnect(connection, parms.force_disconnect);
2770         if (rv < SS_SUCCESS)
2771                 retcode = rv;  /* FIXME: Type mismatch. */
2772         else
2773                 retcode = NO_ERROR;
2774         mutex_unlock(&adm_ctx.resource->adm_mutex);
2775  fail:
2776         drbd_adm_finish(&adm_ctx, info, retcode);
2777         return 0;
2778 }
2779
2780 void resync_after_online_grow(struct drbd_device *device)
2781 {
2782         int iass; /* I am sync source */
2783
2784         drbd_info(device, "Resync of new storage after online grow\n");
2785         if (device->state.role != device->state.peer)
2786                 iass = (device->state.role == R_PRIMARY);
2787         else
2788                 iass = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
2789
2790         if (iass)
2791                 drbd_start_resync(device, C_SYNC_SOURCE);
2792         else
2793                 _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
2794 }
2795
2796 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
2797 {
2798         struct drbd_config_context adm_ctx;
2799         struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
2800         struct resize_parms rs;
2801         struct drbd_device *device;
2802         enum drbd_ret_code retcode;
2803         enum determine_dev_size dd;
2804         bool change_al_layout = false;
2805         enum dds_flags ddsf;
2806         sector_t u_size;
2807         int err;
2808
2809         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2810         if (!adm_ctx.reply_skb)
2811                 return retcode;
2812         if (retcode != NO_ERROR)
2813                 goto finish;
2814
2815         mutex_lock(&adm_ctx.resource->adm_mutex);
2816         device = adm_ctx.device;
2817         if (!get_ldev(device)) {
2818                 retcode = ERR_NO_DISK;
2819                 goto fail;
2820         }
2821
2822         memset(&rs, 0, sizeof(struct resize_parms));
2823         rs.al_stripes = device->ldev->md.al_stripes;
2824         rs.al_stripe_size = device->ldev->md.al_stripe_size_4k * 4;
2825         if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
2826                 err = resize_parms_from_attrs(&rs, info);
2827                 if (err) {
2828                         retcode = ERR_MANDATORY_TAG;
2829                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2830                         goto fail_ldev;
2831                 }
2832         }
2833
2834         if (device->state.conn > C_CONNECTED) {
2835                 retcode = ERR_RESIZE_RESYNC;
2836                 goto fail_ldev;
2837         }
2838
2839         if (device->state.role == R_SECONDARY &&
2840             device->state.peer == R_SECONDARY) {
2841                 retcode = ERR_NO_PRIMARY;
2842                 goto fail_ldev;
2843         }
2844
2845         if (rs.no_resync && first_peer_device(device)->connection->agreed_pro_version < 93) {
2846                 retcode = ERR_NEED_APV_93;
2847                 goto fail_ldev;
2848         }
2849
2850         rcu_read_lock();
2851         u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
2852         rcu_read_unlock();
2853         if (u_size != (sector_t)rs.resize_size) {
2854                 new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
2855                 if (!new_disk_conf) {
2856                         retcode = ERR_NOMEM;
2857                         goto fail_ldev;
2858                 }
2859         }
2860
2861         if (device->ldev->md.al_stripes != rs.al_stripes ||
2862             device->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
2863                 u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
2864
2865                 if (al_size_k > (16 * 1024 * 1024)) {
2866                         retcode = ERR_MD_LAYOUT_TOO_BIG;
2867                         goto fail_ldev;
2868                 }
2869
2870                 if (al_size_k < MD_32kB_SECT/2) {
2871                         retcode = ERR_MD_LAYOUT_TOO_SMALL;
2872                         goto fail_ldev;
2873                 }
2874
2875                 if (device->state.conn != C_CONNECTED && !rs.resize_force) {
2876                         retcode = ERR_MD_LAYOUT_CONNECTED;
2877                         goto fail_ldev;
2878                 }
2879
2880                 change_al_layout = true;
2881         }
2882
2883         if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev))
2884                 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
2885
2886         if (new_disk_conf) {
2887                 mutex_lock(&device->resource->conf_update);
2888                 old_disk_conf = device->ldev->disk_conf;
2889                 *new_disk_conf = *old_disk_conf;
2890                 new_disk_conf->disk_size = (sector_t)rs.resize_size;
2891                 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
2892                 mutex_unlock(&device->resource->conf_update);
2893                 synchronize_rcu();
2894                 kfree(old_disk_conf);
2895                 new_disk_conf = NULL;
2896         }
2897
2898         ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
2899         dd = drbd_determine_dev_size(device, ddsf, change_al_layout ? &rs : NULL);
2900         drbd_md_sync(device);
2901         put_ldev(device);
2902         if (dd == DS_ERROR) {
2903                 retcode = ERR_NOMEM_BITMAP;
2904                 goto fail;
2905         } else if (dd == DS_ERROR_SPACE_MD) {
2906                 retcode = ERR_MD_LAYOUT_NO_FIT;
2907                 goto fail;
2908         } else if (dd == DS_ERROR_SHRINK) {
2909                 retcode = ERR_IMPLICIT_SHRINK;
2910                 goto fail;
2911         }
2912
2913         if (device->state.conn == C_CONNECTED) {
2914                 if (dd == DS_GREW)
2915                         set_bit(RESIZE_PENDING, &device->flags);
2916
2917                 drbd_send_uuids(first_peer_device(device));
2918                 drbd_send_sizes(first_peer_device(device), 1, ddsf);
2919         }
2920
2921  fail:
2922         mutex_unlock(&adm_ctx.resource->adm_mutex);
2923  finish:
2924         drbd_adm_finish(&adm_ctx, info, retcode);
2925         return 0;
2926
2927  fail_ldev:
2928         put_ldev(device);
2929         kfree(new_disk_conf);
2930         goto fail;
2931 }
2932
2933 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
2934 {
2935         struct drbd_config_context adm_ctx;
2936         enum drbd_ret_code retcode;
2937         struct res_opts res_opts;
2938         int err;
2939
2940         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2941         if (!adm_ctx.reply_skb)
2942                 return retcode;
2943         if (retcode != NO_ERROR)
2944                 goto fail;
2945
2946         res_opts = adm_ctx.resource->res_opts;
2947         if (should_set_defaults(info))
2948                 set_res_opts_defaults(&res_opts);
2949
2950         err = res_opts_from_attrs(&res_opts, info);
2951         if (err && err != -ENOMSG) {
2952                 retcode = ERR_MANDATORY_TAG;
2953                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2954                 goto fail;
2955         }
2956
2957         mutex_lock(&adm_ctx.resource->adm_mutex);
2958         err = set_resource_options(adm_ctx.resource, &res_opts);
2959         if (err) {
2960                 retcode = ERR_INVALID_REQUEST;
2961                 if (err == -ENOMEM)
2962                         retcode = ERR_NOMEM;
2963         }
2964         mutex_unlock(&adm_ctx.resource->adm_mutex);
2965
2966 fail:
2967         drbd_adm_finish(&adm_ctx, info, retcode);
2968         return 0;
2969 }
2970
2971 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
2972 {
2973         struct drbd_config_context adm_ctx;
2974         struct drbd_device *device;
2975         int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2976
2977         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2978         if (!adm_ctx.reply_skb)
2979                 return retcode;
2980         if (retcode != NO_ERROR)
2981                 goto out;
2982
2983         device = adm_ctx.device;
2984         if (!get_ldev(device)) {
2985                 retcode = ERR_NO_DISK;
2986                 goto out;
2987         }
2988
2989         mutex_lock(&adm_ctx.resource->adm_mutex);
2990
2991         /* If there is still bitmap IO pending, probably because of a previous
2992          * resync just being finished, wait for it before requesting a new resync.
2993          * Also wait for it's after_state_ch(). */
2994         drbd_suspend_io(device);
2995         wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2996         drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
2997
2998         /* If we happen to be C_STANDALONE R_SECONDARY, just change to
2999          * D_INCONSISTENT, and set all bits in the bitmap.  Otherwise,
3000          * try to start a resync handshake as sync target for full sync.
3001          */
3002         if (device->state.conn == C_STANDALONE && device->state.role == R_SECONDARY) {
3003                 retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT));
3004                 if (retcode >= SS_SUCCESS) {
3005                         if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
3006                                 "set_n_write from invalidate", BM_LOCKED_MASK))
3007                                 retcode = ERR_IO_MD_DISK;
3008                 }
3009         } else
3010                 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
3011         drbd_resume_io(device);
3012         mutex_unlock(&adm_ctx.resource->adm_mutex);
3013         put_ldev(device);
3014 out:
3015         drbd_adm_finish(&adm_ctx, info, retcode);
3016         return 0;
3017 }
3018
3019 static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
3020                 union drbd_state mask, union drbd_state val)
3021 {
3022         struct drbd_config_context adm_ctx;
3023         enum drbd_ret_code retcode;
3024
3025         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3026         if (!adm_ctx.reply_skb)
3027                 return retcode;
3028         if (retcode != NO_ERROR)
3029                 goto out;
3030
3031         mutex_lock(&adm_ctx.resource->adm_mutex);
3032         retcode = drbd_request_state(adm_ctx.device, mask, val);
3033         mutex_unlock(&adm_ctx.resource->adm_mutex);
3034 out:
3035         drbd_adm_finish(&adm_ctx, info, retcode);
3036         return 0;
3037 }
3038
3039 static int drbd_bmio_set_susp_al(struct drbd_device *device) __must_hold(local)
3040 {
3041         int rv;
3042
3043         rv = drbd_bmio_set_n_write(device);
3044         drbd_suspend_al(device);
3045         return rv;
3046 }
3047
3048 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
3049 {
3050         struct drbd_config_context adm_ctx;
3051         int retcode; /* drbd_ret_code, drbd_state_rv */
3052         struct drbd_device *device;
3053
3054         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3055         if (!adm_ctx.reply_skb)
3056                 return retcode;
3057         if (retcode != NO_ERROR)
3058                 goto out;
3059
3060         device = adm_ctx.device;
3061         if (!get_ldev(device)) {
3062                 retcode = ERR_NO_DISK;
3063                 goto out;
3064         }
3065
3066         mutex_lock(&adm_ctx.resource->adm_mutex);
3067
3068         /* If there is still bitmap IO pending, probably because of a previous
3069          * resync just being finished, wait for it before requesting a new resync.
3070          * Also wait for it's after_state_ch(). */
3071         drbd_suspend_io(device);
3072         wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
3073         drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
3074
3075         /* If we happen to be C_STANDALONE R_PRIMARY, just set all bits
3076          * in the bitmap.  Otherwise, try to start a resync handshake
3077          * as sync source for full sync.
3078          */
3079         if (device->state.conn == C_STANDALONE && device->state.role == R_PRIMARY) {
3080                 /* The peer will get a resync upon connect anyways. Just make that
3081                    into a full resync. */
3082                 retcode = drbd_request_state(device, NS(pdsk, D_INCONSISTENT));
3083                 if (retcode >= SS_SUCCESS) {
3084                         if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al,
3085                                 "set_n_write from invalidate_peer",
3086                                 BM_LOCKED_SET_ALLOWED))
3087                                 retcode = ERR_IO_MD_DISK;
3088                 }
3089         } else
3090                 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
3091         drbd_resume_io(device);
3092         mutex_unlock(&adm_ctx.resource->adm_mutex);
3093         put_ldev(device);
3094 out:
3095         drbd_adm_finish(&adm_ctx, info, retcode);
3096         return 0;
3097 }
3098
3099 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
3100 {
3101         struct drbd_config_context adm_ctx;
3102         enum drbd_ret_code retcode;
3103
3104         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3105         if (!adm_ctx.reply_skb)
3106                 return retcode;
3107         if (retcode != NO_ERROR)
3108                 goto out;
3109
3110         mutex_lock(&adm_ctx.resource->adm_mutex);
3111         if (drbd_request_state(adm_ctx.device, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
3112                 retcode = ERR_PAUSE_IS_SET;
3113         mutex_unlock(&adm_ctx.resource->adm_mutex);
3114 out:
3115         drbd_adm_finish(&adm_ctx, info, retcode);
3116         return 0;
3117 }
3118
3119 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
3120 {
3121         struct drbd_config_context adm_ctx;
3122         union drbd_dev_state s;
3123         enum drbd_ret_code retcode;
3124
3125         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3126         if (!adm_ctx.reply_skb)
3127                 return retcode;
3128         if (retcode != NO_ERROR)
3129                 goto out;
3130
3131         mutex_lock(&adm_ctx.resource->adm_mutex);
3132         if (drbd_request_state(adm_ctx.device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
3133                 s = adm_ctx.device->state;
3134                 if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
3135                         retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
3136                                   s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
3137                 } else {
3138                         retcode = ERR_PAUSE_IS_CLEAR;
3139                 }
3140         }
3141         mutex_unlock(&adm_ctx.resource->adm_mutex);
3142 out:
3143         drbd_adm_finish(&adm_ctx, info, retcode);
3144         return 0;
3145 }
3146
3147 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
3148 {
3149         return drbd_adm_simple_request_state(skb, info, NS(susp, 1));
3150 }
3151
3152 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
3153 {
3154         struct drbd_config_context adm_ctx;
3155         struct drbd_device *device;
3156         int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
3157
3158         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3159         if (!adm_ctx.reply_skb)
3160                 return retcode;
3161         if (retcode != NO_ERROR)
3162                 goto out;
3163
3164         mutex_lock(&adm_ctx.resource->adm_mutex);
3165         device = adm_ctx.device;
3166         if (test_bit(NEW_CUR_UUID, &device->flags)) {
3167                 if (get_ldev_if_state(device, D_ATTACHING)) {
3168                         drbd_uuid_new_current(device);
3169                         put_ldev(device);
3170                 } else {
3171                         /* This is effectively a multi-stage "forced down".
3172                          * The NEW_CUR_UUID bit is supposedly only set, if we
3173                          * lost the replication connection, and are configured
3174                          * to freeze IO and wait for some fence-peer handler.
3175                          * So we still don't have a replication connection.
3176                          * And now we don't have a local disk either.  After
3177                          * resume, we will fail all pending and new IO, because
3178                          * we don't have any data anymore.  Which means we will
3179                          * eventually be able to terminate all users of this
3180                          * device, and then take it down.  By bumping the
3181                          * "effective" data uuid, we make sure that you really
3182                          * need to tear down before you reconfigure, we will
3183                          * the refuse to re-connect or re-attach (because no
3184                          * matching real data uuid exists).
3185                          */
3186                         u64 val;
3187                         get_random_bytes(&val, sizeof(u64));
3188                         drbd_set_ed_uuid(device, val);
3189                         drbd_warn(device, "Resumed without access to data; please tear down before attempting to re-configure.\n");
3190                 }
3191                 clear_bit(NEW_CUR_UUID, &device->flags);
3192         }
3193         drbd_suspend_io(device);
3194         retcode = drbd_request_state(device, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
3195         if (retcode == SS_SUCCESS) {
3196                 if (device->state.conn < C_CONNECTED)
3197                         tl_clear(first_peer_device(device)->connection);
3198                 if (device->state.disk == D_DISKLESS || device->state.disk == D_FAILED)
3199                         tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO);
3200         }
3201         drbd_resume_io(device);
3202         mutex_unlock(&adm_ctx.resource->adm_mutex);
3203 out:
3204         drbd_adm_finish(&adm_ctx, info, retcode);
3205         return 0;
3206 }
3207
3208 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
3209 {
3210         return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
3211 }
3212
3213 static int nla_put_drbd_cfg_context(struct sk_buff *skb,
3214                                     struct drbd_resource *resource,
3215                                     struct drbd_connection *connection,
3216                                     struct drbd_device *device)
3217 {
3218         struct nlattr *nla;
3219         nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
3220         if (!nla)
3221                 goto nla_put_failure;
3222         if (device &&
3223             nla_put_u32(skb, T_ctx_volume, device->vnr))
3224                 goto nla_put_failure;
3225         if (nla_put_string(skb, T_ctx_resource_name, resource->name))
3226                 goto nla_put_failure;
3227         if (connection) {
3228                 if (connection->my_addr_len &&
3229                     nla_put(skb, T_ctx_my_addr, connection->my_addr_len, &connection->my_addr))
3230                         goto nla_put_failure;
3231                 if (connection->peer_addr_len &&
3232                     nla_put(skb, T_ctx_peer_addr, connection->peer_addr_len, &connection->peer_addr))
3233                         goto nla_put_failure;
3234         }
3235         nla_nest_end(skb, nla);
3236         return 0;
3237
3238 nla_put_failure:
3239         if (nla)
3240                 nla_nest_cancel(skb, nla);
3241         return -EMSGSIZE;
3242 }
3243
3244 /*
3245  * The generic netlink dump callbacks are called outside the genl_lock(), so
3246  * they cannot use the simple attribute parsing code which uses global
3247  * attribute tables.
3248  */
3249 static struct nlattr *find_cfg_context_attr(const struct nlmsghdr *nlh, int attr)
3250 {
3251         const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
3252         const int maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
3253         struct nlattr *nla;
3254
3255         nla = nla_find(nlmsg_attrdata(nlh, hdrlen), nlmsg_attrlen(nlh, hdrlen),
3256                        DRBD_NLA_CFG_CONTEXT);
3257         if (!nla)
3258                 return NULL;
3259         return drbd_nla_find_nested(maxtype, nla, __nla_type(attr));
3260 }
3261
3262 static void resource_to_info(struct resource_info *, struct drbd_resource *);
3263
3264 int drbd_adm_dump_resources(struct sk_buff *skb, struct netlink_callback *cb)
3265 {
3266         struct drbd_genlmsghdr *dh;
3267         struct drbd_resource *resource;
3268         struct resource_info resource_info;
3269         struct resource_statistics resource_statistics;
3270         int err;
3271
3272         rcu_read_lock();
3273         if (cb->args[0]) {
3274                 for_each_resource_rcu(resource, &drbd_resources)
3275                         if (resource == (struct drbd_resource *)cb->args[0])
3276                                 goto found_resource;
3277                 err = 0;  /* resource was probably deleted */
3278                 goto out;
3279         }
3280         resource = list_entry(&drbd_resources,
3281                               struct drbd_resource, resources);
3282
3283 found_resource:
3284         list_for_each_entry_continue_rcu(resource, &drbd_resources, resources) {
3285                 goto put_result;
3286         }
3287         err = 0;
3288         goto out;
3289
3290 put_result:
3291         dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3292                         cb->nlh->nlmsg_seq, &drbd_genl_family,
3293                         NLM_F_MULTI, DRBD_ADM_GET_RESOURCES);
3294         err = -ENOMEM;
3295         if (!dh)
3296                 goto out;
3297         dh->minor = -1U;
3298         dh->ret_code = NO_ERROR;
3299         err = nla_put_drbd_cfg_context(skb, resource, NULL, NULL);
3300         if (err)
3301                 goto out;
3302         err = res_opts_to_skb(skb, &resource->res_opts, !capable(CAP_SYS_ADMIN));
3303         if (err)
3304                 goto out;
3305         resource_to_info(&resource_info, resource);
3306         err = resource_info_to_skb(skb, &resource_info, !capable(CAP_SYS_ADMIN));
3307         if (err)
3308                 goto out;
3309         resource_statistics.res_stat_write_ordering = resource->write_ordering;
3310         err = resource_statistics_to_skb(skb, &resource_statistics, !capable(CAP_SYS_ADMIN));
3311         if (err)
3312                 goto out;
3313         cb->args[0] = (long)resource;
3314         genlmsg_end(skb, dh);
3315         err = 0;
3316
3317 out:
3318         rcu_read_unlock();
3319         if (err)
3320                 return err;
3321         return skb->len;
3322 }
3323
3324 static void device_to_statistics(struct device_statistics *s,
3325                                  struct drbd_device *device)
3326 {
3327         memset(s, 0, sizeof(*s));
3328         s->dev_upper_blocked = !may_inc_ap_bio(device);
3329         if (get_ldev(device)) {
3330                 struct drbd_md *md = &device->ldev->md;
3331                 u64 *history_uuids = (u64 *)s->history_uuids;
3332                 struct request_queue *q;
3333                 int n;
3334
3335                 spin_lock_irq(&md->uuid_lock);
3336                 s->dev_current_uuid = md->uuid[UI_CURRENT];
3337                 BUILD_BUG_ON(sizeof(s->history_uuids) < UI_HISTORY_END - UI_HISTORY_START + 1);
3338                 for (n = 0; n < UI_HISTORY_END - UI_HISTORY_START + 1; n++)
3339                         history_uuids[n] = md->uuid[UI_HISTORY_START + n];
3340                 for (; n < HISTORY_UUIDS; n++)
3341                         history_uuids[n] = 0;
3342                 s->history_uuids_len = HISTORY_UUIDS;
3343                 spin_unlock_irq(&md->uuid_lock);
3344
3345                 s->dev_disk_flags = md->flags;
3346                 q = bdev_get_queue(device->ldev->backing_bdev);
3347                 s->dev_lower_blocked =
3348                         bdi_congested(&q->backing_dev_info,
3349                                       (1 << WB_async_congested) |
3350                                       (1 << WB_sync_congested));
3351                 put_ldev(device);
3352         }
3353         s->dev_size = drbd_get_capacity(device->this_bdev);
3354         s->dev_read = device->read_cnt;
3355         s->dev_write = device->writ_cnt;
3356         s->dev_al_writes = device->al_writ_cnt;
3357         s->dev_bm_writes = device->bm_writ_cnt;
3358         s->dev_upper_pending = atomic_read(&device->ap_bio_cnt);
3359         s->dev_lower_pending = atomic_read(&device->local_cnt);
3360         s->dev_al_suspended = test_bit(AL_SUSPENDED, &device->flags);
3361         s->dev_exposed_data_uuid = device->ed_uuid;
3362 }
3363
3364 static int put_resource_in_arg0(struct netlink_callback *cb, int holder_nr)
3365 {
3366         if (cb->args[0]) {
3367                 struct drbd_resource *resource =
3368                         (struct drbd_resource *)cb->args[0];
3369                 kref_put(&resource->kref, drbd_destroy_resource);
3370         }
3371
3372         return 0;
3373 }
3374
3375 int drbd_adm_dump_devices_done(struct netlink_callback *cb) {
3376         return put_resource_in_arg0(cb, 7);
3377 }
3378
3379 static void device_to_info(struct device_info *, struct drbd_device *);
3380
3381 int drbd_adm_dump_devices(struct sk_buff *skb, struct netlink_callback *cb)
3382 {
3383         struct nlattr *resource_filter;
3384         struct drbd_resource *resource;
3385         struct drbd_device *uninitialized_var(device);
3386         int minor, err, retcode;
3387         struct drbd_genlmsghdr *dh;
3388         struct device_info device_info;
3389         struct device_statistics device_statistics;
3390         struct idr *idr_to_search;
3391
3392         resource = (struct drbd_resource *)cb->args[0];
3393         if (!cb->args[0] && !cb->args[1]) {
3394                 resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name);
3395                 if (resource_filter) {
3396                         retcode = ERR_RES_NOT_KNOWN;
3397                         resource = drbd_find_resource(nla_data(resource_filter));
3398                         if (!resource)
3399                                 goto put_result;
3400                         cb->args[0] = (long)resource;
3401                 }
3402         }
3403
3404         rcu_read_lock();
3405         minor = cb->args[1];
3406         idr_to_search = resource ? &resource->devices : &drbd_devices;
3407         device = idr_get_next(idr_to_search, &minor);
3408         if (!device) {
3409                 err = 0;
3410                 goto out;
3411         }
3412         idr_for_each_entry_continue(idr_to_search, device, minor) {
3413                 retcode = NO_ERROR;
3414                 goto put_result;  /* only one iteration */
3415         }
3416         err = 0;
3417         goto out;  /* no more devices */
3418
3419 put_result:
3420         dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3421                         cb->nlh->nlmsg_seq, &drbd_genl_family,
3422                         NLM_F_MULTI, DRBD_ADM_GET_DEVICES);
3423         err = -ENOMEM;
3424         if (!dh)
3425                 goto out;
3426         dh->ret_code = retcode;
3427         dh->minor = -1U;
3428         if (retcode == NO_ERROR) {
3429                 dh->minor = device->minor;
3430                 err = nla_put_drbd_cfg_context(skb, device->resource, NULL, device);
3431                 if (err)
3432                         goto out;
3433                 if (get_ldev(device)) {
3434                         struct disk_conf *disk_conf =
3435                                 rcu_dereference(device->ldev->disk_conf);
3436
3437                         err = disk_conf_to_skb(skb, disk_conf, !capable(CAP_SYS_ADMIN));
3438                         put_ldev(device);
3439                         if (err)
3440                                 goto out;
3441                 }
3442                 device_to_info(&device_info, device);
3443                 err = device_info_to_skb(skb, &device_info, !capable(CAP_SYS_ADMIN));
3444                 if (err)
3445                         goto out;
3446
3447                 device_to_statistics(&device_statistics, device);
3448                 err = device_statistics_to_skb(skb, &device_statistics, !capable(CAP_SYS_ADMIN));
3449                 if (err)
3450                         goto out;
3451                 cb->args[1] = minor + 1;
3452         }
3453         genlmsg_end(skb, dh);
3454         err = 0;
3455
3456 out:
3457         rcu_read_unlock();
3458         if (err)
3459                 return err;
3460         return skb->len;
3461 }
3462
3463 int drbd_adm_dump_connections_done(struct netlink_callback *cb)
3464 {
3465         return put_resource_in_arg0(cb, 6);
3466 }
3467
3468 enum { SINGLE_RESOURCE, ITERATE_RESOURCES };
3469
3470 int drbd_adm_dump_connections(struct sk_buff *skb, struct netlink_callback *cb)
3471 {
3472         struct nlattr *resource_filter;
3473         struct drbd_resource *resource = NULL, *next_resource;
3474         struct drbd_connection *uninitialized_var(connection);
3475         int err = 0, retcode;
3476         struct drbd_genlmsghdr *dh;
3477         struct connection_info connection_info;
3478         struct connection_statistics connection_statistics;
3479
3480         rcu_read_lock();
3481         resource = (struct drbd_resource *)cb->args[0];
3482         if (!cb->args[0]) {
3483                 resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name);
3484                 if (resource_filter) {
3485                         retcode = ERR_RES_NOT_KNOWN;
3486                         resource = drbd_find_resource(nla_data(resource_filter));
3487                         if (!resource)
3488                                 goto put_result;
3489                         cb->args[0] = (long)resource;
3490                         cb->args[1] = SINGLE_RESOURCE;
3491                 }
3492         }
3493         if (!resource) {
3494                 if (list_empty(&drbd_resources))
3495                         goto out;
3496                 resource = list_first_entry(&drbd_resources, struct drbd_resource, resources);
3497                 kref_get(&resource->kref);
3498                 cb->args[0] = (long)resource;
3499                 cb->args[1] = ITERATE_RESOURCES;
3500         }
3501
3502     next_resource:
3503         rcu_read_unlock();
3504         mutex_lock(&resource->conf_update);
3505         rcu_read_lock();
3506         if (cb->args[2]) {
3507                 for_each_connection_rcu(connection, resource)
3508                         if (connection == (struct drbd_connection *)cb->args[2])
3509                                 goto found_connection;
3510                 /* connection was probably deleted */
3511                 goto no_more_connections;
3512         }
3513         connection = list_entry(&resource->connections, struct drbd_connection, connections);
3514
3515 found_connection:
3516         list_for_each_entry_continue_rcu(connection, &resource->connections, connections) {
3517                 if (!has_net_conf(connection))
3518                         continue;
3519                 retcode = NO_ERROR;
3520                 goto put_result;  /* only one iteration */
3521         }
3522
3523 no_more_connections:
3524         if (cb->args[1] == ITERATE_RESOURCES) {
3525                 for_each_resource_rcu(next_resource, &drbd_resources) {
3526                         if (next_resource == resource)
3527                                 goto found_resource;
3528                 }
3529                 /* resource was probably deleted */
3530         }
3531         goto out;
3532
3533 found_resource:
3534         list_for_each_entry_continue_rcu(next_resource, &drbd_resources, resources) {
3535                 mutex_unlock(&resource->conf_update);
3536                 kref_put(&resource->kref, drbd_destroy_resource);
3537                 resource = next_resource;
3538                 kref_get(&resource->kref);
3539                 cb->args[0] = (long)resource;
3540                 cb->args[2] = 0;
3541                 goto next_resource;
3542         }
3543         goto out;  /* no more resources */
3544
3545 put_result:
3546         dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3547                         cb->nlh->nlmsg_seq, &drbd_genl_family,
3548                         NLM_F_MULTI, DRBD_ADM_GET_CONNECTIONS);
3549         err = -ENOMEM;
3550         if (!dh)
3551                 goto out;
3552         dh->ret_code = retcode;
3553         dh->minor = -1U;
3554         if (retcode == NO_ERROR) {
3555                 struct net_conf *net_conf;
3556
3557                 err = nla_put_drbd_cfg_context(skb, resource, connection, NULL);
3558                 if (err)
3559                         goto out;
3560                 net_conf = rcu_dereference(connection->net_conf);
3561                 if (net_conf) {
3562                         err = net_conf_to_skb(skb, net_conf, !capable(CAP_SYS_ADMIN));
3563                         if (err)
3564                                 goto out;
3565                 }
3566                 connection_to_info(&connection_info, connection);
3567                 err = connection_info_to_skb(skb, &connection_info, !capable(CAP_SYS_ADMIN));
3568                 if (err)
3569                         goto out;
3570                 connection_statistics.conn_congested = test_bit(NET_CONGESTED, &connection->flags);
3571                 err = connection_statistics_to_skb(skb, &connection_statistics, !capable(CAP_SYS_ADMIN));
3572                 if (err)
3573                         goto out;
3574                 cb->args[2] = (long)connection;
3575         }
3576         genlmsg_end(skb, dh);
3577         err = 0;
3578
3579 out:
3580         rcu_read_unlock();
3581         if (resource)
3582                 mutex_unlock(&resource->conf_update);
3583         if (err)
3584                 return err;
3585         return skb->len;
3586 }
3587
3588 enum mdf_peer_flag {
3589         MDF_PEER_CONNECTED =    1 << 0,
3590         MDF_PEER_OUTDATED =     1 << 1,
3591         MDF_PEER_FENCING =      1 << 2,
3592         MDF_PEER_FULL_SYNC =    1 << 3,
3593 };
3594
3595 static void peer_device_to_statistics(struct peer_device_statistics *s,
3596                                       struct drbd_peer_device *peer_device)
3597 {
3598         struct drbd_device *device = peer_device->device;
3599
3600         memset(s, 0, sizeof(*s));
3601         s->peer_dev_received = device->recv_cnt;
3602         s->peer_dev_sent = device->send_cnt;
3603         s->peer_dev_pending = atomic_read(&device->ap_pending_cnt) +
3604                               atomic_read(&device->rs_pending_cnt);
3605         s->peer_dev_unacked = atomic_read(&device->unacked_cnt);
3606         s->peer_dev_out_of_sync = drbd_bm_total_weight(device) << (BM_BLOCK_SHIFT - 9);
3607         s->peer_dev_resync_failed = device->rs_failed << (BM_BLOCK_SHIFT - 9);
3608         if (get_ldev(device)) {
3609                 struct drbd_md *md = &device->ldev->md;
3610
3611                 spin_lock_irq(&md->uuid_lock);
3612                 s->peer_dev_bitmap_uuid = md->uuid[UI_BITMAP];
3613                 spin_unlock_irq(&md->uuid_lock);
3614                 s->peer_dev_flags =
3615                         (drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND) ?
3616                                 MDF_PEER_CONNECTED : 0) +
3617                         (drbd_md_test_flag(device->ldev, MDF_CONSISTENT) &&
3618                          !drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE) ?
3619                                 MDF_PEER_OUTDATED : 0) +
3620                         /* FIXME: MDF_PEER_FENCING? */
3621                         (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ?
3622                                 MDF_PEER_FULL_SYNC : 0);
3623                 put_ldev(device);
3624         }
3625 }
3626
3627 int drbd_adm_dump_peer_devices_done(struct netlink_callback *cb)
3628 {
3629         return put_resource_in_arg0(cb, 9);
3630 }
3631
3632 int drbd_adm_dump_peer_devices(struct sk_buff *skb, struct netlink_callback *cb)
3633 {
3634         struct nlattr *resource_filter;
3635         struct drbd_resource *resource;
3636         struct drbd_device *uninitialized_var(device);
3637         struct drbd_peer_device *peer_device = NULL;
3638         int minor, err, retcode;
3639         struct drbd_genlmsghdr *dh;
3640         struct idr *idr_to_search;
3641
3642         resource = (struct drbd_resource *)cb->args[0];
3643         if (!cb->args[0] && !cb->args[1]) {
3644                 resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name);
3645                 if (resource_filter) {
3646                         retcode = ERR_RES_NOT_KNOWN;
3647                         resource = drbd_find_resource(nla_data(resource_filter));
3648                         if (!resource)
3649                                 goto put_result;
3650                 }
3651                 cb->args[0] = (long)resource;
3652         }
3653
3654         rcu_read_lock();
3655         minor = cb->args[1];
3656         idr_to_search = resource ? &resource->devices : &drbd_devices;
3657         device = idr_find(idr_to_search, minor);
3658         if (!device) {
3659 next_device:
3660                 minor++;
3661                 cb->args[2] = 0;
3662                 device = idr_get_next(idr_to_search, &minor);
3663                 if (!device) {
3664                         err = 0;
3665                         goto out;
3666                 }
3667         }
3668         if (cb->args[2]) {
3669                 for_each_peer_device(peer_device, device)
3670                         if (peer_device == (struct drbd_peer_device *)cb->args[2])
3671                                 goto found_peer_device;
3672                 /* peer device was probably deleted */
3673                 goto next_device;
3674         }
3675         /* Make peer_device point to the list head (not the first entry). */
3676         peer_device = list_entry(&device->peer_devices, struct drbd_peer_device, peer_devices);
3677
3678 found_peer_device:
3679         list_for_each_entry_continue_rcu(peer_device, &device->peer_devices, peer_devices) {
3680                 if (!has_net_conf(peer_device->connection))
3681                         continue;
3682                 retcode = NO_ERROR;
3683                 goto put_result;  /* only one iteration */
3684         }
3685         goto next_device;
3686
3687 put_result:
3688         dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3689                         cb->nlh->nlmsg_seq, &drbd_genl_family,
3690                         NLM_F_MULTI, DRBD_ADM_GET_PEER_DEVICES);
3691         err = -ENOMEM;
3692         if (!dh)
3693                 goto out;
3694         dh->ret_code = retcode;
3695         dh->minor = -1U;
3696         if (retcode == NO_ERROR) {
3697                 struct peer_device_info peer_device_info;
3698                 struct peer_device_statistics peer_device_statistics;
3699
3700                 dh->minor = minor;
3701                 err = nla_put_drbd_cfg_context(skb, device->resource, peer_device->connection, device);
3702                 if (err)
3703                         goto out;
3704                 peer_device_to_info(&peer_device_info, peer_device);
3705                 err = peer_device_info_to_skb(skb, &peer_device_info, !capable(CAP_SYS_ADMIN));
3706                 if (err)
3707                         goto out;
3708                 peer_device_to_statistics(&peer_device_statistics, peer_device);
3709                 err = peer_device_statistics_to_skb(skb, &peer_device_statistics, !capable(CAP_SYS_ADMIN));
3710                 if (err)
3711                         goto out;
3712                 cb->args[1] = minor;
3713                 cb->args[2] = (long)peer_device;
3714         }
3715         genlmsg_end(skb, dh);
3716         err = 0;
3717
3718 out:
3719         rcu_read_unlock();
3720         if (err)
3721                 return err;
3722         return skb->len;
3723 }
3724 /*
3725  * Return the connection of @resource if @resource has exactly one connection.
3726  */
3727 static struct drbd_connection *the_only_connection(struct drbd_resource *resource)
3728 {
3729         struct list_head *connections = &resource->connections;
3730
3731         if (list_empty(connections) || connections->next->next != connections)
3732                 return NULL;
3733         return list_first_entry(&resource->connections, struct drbd_connection, connections);
3734 }
3735
3736 static int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
3737                 const struct sib_info *sib)
3738 {
3739         struct drbd_resource *resource = device->resource;
3740         struct state_info *si = NULL; /* for sizeof(si->member); */
3741         struct nlattr *nla;
3742         int got_ldev;
3743         int err = 0;
3744         int exclude_sensitive;
3745
3746         /* If sib != NULL, this is drbd_bcast_event, which anyone can listen
3747          * to.  So we better exclude_sensitive information.
3748          *
3749          * If sib == NULL, this is drbd_adm_get_status, executed synchronously
3750          * in the context of the requesting user process. Exclude sensitive
3751          * information, unless current has superuser.
3752          *
3753          * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
3754          * relies on the current implementation of netlink_dump(), which
3755          * executes the dump callback successively from netlink_recvmsg(),
3756          * always in the context of the receiving process */
3757         exclude_sensitive = sib || !capable(CAP_SYS_ADMIN);
3758
3759         got_ldev = get_ldev(device);
3760
3761         /* We need to add connection name and volume number information still.
3762          * Minor number is in drbd_genlmsghdr. */
3763         if (nla_put_drbd_cfg_context(skb, resource, the_only_connection(resource), device))
3764                 goto nla_put_failure;
3765
3766         if (res_opts_to_skb(skb, &device->resource->res_opts, exclude_sensitive))
3767                 goto nla_put_failure;
3768
3769         rcu_read_lock();
3770         if (got_ldev) {
3771                 struct disk_conf *disk_conf;
3772
3773                 disk_conf = rcu_dereference(device->ldev->disk_conf);
3774                 err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive);
3775         }
3776         if (!err) {
3777                 struct net_conf *nc;
3778
3779                 nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
3780                 if (nc)
3781                         err = net_conf_to_skb(skb, nc, exclude_sensitive);
3782         }
3783         rcu_read_unlock();
3784         if (err)
3785                 goto nla_put_failure;
3786
3787         nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
3788         if (!nla)
3789                 goto nla_put_failure;
3790         if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) ||
3791             nla_put_u32(skb, T_current_state, device->state.i) ||
3792             nla_put_u64_0pad(skb, T_ed_uuid, device->ed_uuid) ||
3793             nla_put_u64_0pad(skb, T_capacity,
3794                              drbd_get_capacity(device->this_bdev)) ||
3795             nla_put_u64_0pad(skb, T_send_cnt, device->send_cnt) ||
3796             nla_put_u64_0pad(skb, T_recv_cnt, device->recv_cnt) ||
3797             nla_put_u64_0pad(skb, T_read_cnt, device->read_cnt) ||
3798             nla_put_u64_0pad(skb, T_writ_cnt, device->writ_cnt) ||
3799             nla_put_u64_0pad(skb, T_al_writ_cnt, device->al_writ_cnt) ||
3800             nla_put_u64_0pad(skb, T_bm_writ_cnt, device->bm_writ_cnt) ||
3801             nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&device->ap_bio_cnt)) ||
3802             nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&device->ap_pending_cnt)) ||
3803             nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&device->rs_pending_cnt)))
3804                 goto nla_put_failure;
3805
3806         if (got_ldev) {
3807                 int err;
3808
3809                 spin_lock_irq(&device->ldev->md.uuid_lock);
3810                 err = nla_put(skb, T_uuids, sizeof(si->uuids), device->ldev->md.uuid);
3811                 spin_unlock_irq(&device->ldev->md.uuid_lock);
3812
3813                 if (err)
3814                         goto nla_put_failure;
3815
3816                 if (nla_put_u32(skb, T_disk_flags, device->ldev->md.flags) ||
3817                     nla_put_u64_0pad(skb, T_bits_total, drbd_bm_bits(device)) ||
3818                     nla_put_u64_0pad(skb, T_bits_oos,
3819                                      drbd_bm_total_weight(device)))
3820                         goto nla_put_failure;
3821                 if (C_SYNC_SOURCE <= device->state.conn &&
3822                     C_PAUSED_SYNC_T >= device->state.conn) {
3823                         if (nla_put_u64_0pad(skb, T_bits_rs_total,
3824                                              device->rs_total) ||
3825                             nla_put_u64_0pad(skb, T_bits_rs_failed,
3826                                              device->rs_failed))
3827                                 goto nla_put_failure;
3828                 }
3829         }
3830
3831         if (sib) {
3832                 switch(sib->sib_reason) {
3833                 case SIB_SYNC_PROGRESS:
3834                 case SIB_GET_STATUS_REPLY:
3835                         break;
3836                 case SIB_STATE_CHANGE:
3837                         if (nla_put_u32(skb, T_prev_state, sib->os.i) ||
3838                             nla_put_u32(skb, T_new_state, sib->ns.i))
3839                                 goto nla_put_failure;
3840                         break;
3841                 case SIB_HELPER_POST:
3842                         if (nla_put_u32(skb, T_helper_exit_code,
3843                                         sib->helper_exit_code))
3844                                 goto nla_put_failure;
3845                         /* fall through */
3846                 case SIB_HELPER_PRE:
3847                         if (nla_put_string(skb, T_helper, sib->helper_name))
3848                                 goto nla_put_failure;
3849                         break;
3850                 }
3851         }
3852         nla_nest_end(skb, nla);
3853
3854         if (0)
3855 nla_put_failure:
3856                 err = -EMSGSIZE;
3857         if (got_ldev)
3858                 put_ldev(device);
3859         return err;
3860 }
3861
3862 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
3863 {
3864         struct drbd_config_context adm_ctx;
3865         enum drbd_ret_code retcode;
3866         int err;
3867
3868         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3869         if (!adm_ctx.reply_skb)
3870                 return retcode;
3871         if (retcode != NO_ERROR)
3872                 goto out;
3873
3874         err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.device, NULL);
3875         if (err) {
3876                 nlmsg_free(adm_ctx.reply_skb);
3877                 return err;
3878         }
3879 out:
3880         drbd_adm_finish(&adm_ctx, info, retcode);
3881         return 0;
3882 }
3883
3884 static int get_one_status(struct sk_buff *skb, struct netlink_callback *cb)
3885 {
3886         struct drbd_device *device;
3887         struct drbd_genlmsghdr *dh;
3888         struct drbd_resource *pos = (struct drbd_resource *)cb->args[0];
3889         struct drbd_resource *resource = NULL;
3890         struct drbd_resource *tmp;
3891         unsigned volume = cb->args[1];
3892
3893         /* Open coded, deferred, iteration:
3894          * for_each_resource_safe(resource, tmp, &drbd_resources) {
3895          *      connection = "first connection of resource or undefined";
3896          *      idr_for_each_entry(&resource->devices, device, i) {
3897          *        ...
3898          *      }
3899          * }
3900          * where resource is cb->args[0];
3901          * and i is cb->args[1];
3902          *
3903          * cb->args[2] indicates if we shall loop over all resources,
3904          * or just dump all volumes of a single resource.
3905          *
3906          * This may miss entries inserted after this dump started,
3907          * or entries deleted before they are reached.
3908          *
3909          * We need to make sure the device won't disappear while
3910          * we are looking at it, and revalidate our iterators
3911          * on each iteration.
3912          */
3913
3914         /* synchronize with conn_create()/drbd_destroy_connection() */
3915         rcu_read_lock();
3916         /* revalidate iterator position */
3917         for_each_resource_rcu(tmp, &drbd_resources) {
3918                 if (pos == NULL) {
3919                         /* first iteration */
3920                         pos = tmp;
3921                         resource = pos;
3922                         break;
3923                 }
3924                 if (tmp == pos) {
3925                         resource = pos;
3926                         break;
3927                 }
3928         }
3929         if (resource) {
3930 next_resource:
3931                 device = idr_get_next(&resource->devices, &volume);
3932                 if (!device) {
3933                         /* No more volumes to dump on this resource.
3934                          * Advance resource iterator. */
3935                         pos = list_entry_rcu(resource->resources.next,
3936                                              struct drbd_resource, resources);
3937                         /* Did we dump any volume of this resource yet? */
3938                         if (volume != 0) {
3939                                 /* If we reached the end of the list,
3940                                  * or only a single resource dump was requested,
3941                                  * we are done. */
3942                                 if (&pos->resources == &drbd_resources || cb->args[2])
3943                                         goto out;
3944                                 volume = 0;
3945                                 resource = pos;
3946                                 goto next_resource;
3947                         }
3948                 }
3949
3950                 dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3951                                 cb->nlh->nlmsg_seq, &drbd_genl_family,
3952                                 NLM_F_MULTI, DRBD_ADM_GET_STATUS);
3953                 if (!dh)
3954                         goto out;
3955
3956                 if (!device) {
3957                         /* This is a connection without a single volume.
3958                          * Suprisingly enough, it may have a network
3959                          * configuration. */
3960                         struct drbd_connection *connection;
3961
3962                         dh->minor = -1U;
3963                         dh->ret_code = NO_ERROR;
3964                         connection = the_only_connection(resource);
3965                         if (nla_put_drbd_cfg_context(skb, resource, connection, NULL))
3966                                 goto cancel;
3967                         if (connection) {
3968                                 struct net_conf *nc;
3969
3970                                 nc = rcu_dereference(connection->net_conf);
3971                                 if (nc && net_conf_to_skb(skb, nc, 1) != 0)
3972                                         goto cancel;
3973                         }
3974                         goto done;
3975                 }
3976
3977                 D_ASSERT(device, device->vnr == volume);
3978                 D_ASSERT(device, device->resource == resource);
3979
3980                 dh->minor = device_to_minor(device);
3981                 dh->ret_code = NO_ERROR;
3982
3983                 if (nla_put_status_info(skb, device, NULL)) {
3984 cancel:
3985                         genlmsg_cancel(skb, dh);
3986                         goto out;
3987                 }
3988 done:
3989                 genlmsg_end(skb, dh);
3990         }
3991
3992 out:
3993         rcu_read_unlock();
3994         /* where to start the next iteration */
3995         cb->args[0] = (long)pos;
3996         cb->args[1] = (pos == resource) ? volume + 1 : 0;
3997
3998         /* No more resources/volumes/minors found results in an empty skb.
3999          * Which will terminate the dump. */
4000         return skb->len;
4001 }
4002
4003 /*
4004  * Request status of all resources, or of all volumes within a single resource.
4005  *
4006  * This is a dump, as the answer may not fit in a single reply skb otherwise.
4007  * Which means we cannot use the family->attrbuf or other such members, because
4008  * dump is NOT protected by the genl_lock().  During dump, we only have access
4009  * to the incoming skb, and need to opencode "parsing" of the nlattr payload.
4010  *
4011  * Once things are setup properly, we call into get_one_status().
4012  */
4013 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
4014 {
4015         const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
4016         struct nlattr *nla;
4017         const char *resource_name;
4018         struct drbd_resource *resource;
4019         int maxtype;
4020
4021         /* Is this a followup call? */
4022         if (cb->args[0]) {
4023                 /* ... of a single resource dump,
4024                  * and the resource iterator has been advanced already? */
4025                 if (cb->args[2] && cb->args[2] != cb->args[0])
4026                         return 0; /* DONE. */
4027                 goto dump;
4028         }
4029
4030         /* First call (from netlink_dump_start).  We need to figure out
4031          * which resource(s) the user wants us to dump. */
4032         nla = nla_find(nlmsg_attrdata(cb->nlh, hdrlen),
4033                         nlmsg_attrlen(cb->nlh, hdrlen),
4034                         DRBD_NLA_CFG_CONTEXT);
4035
4036         /* No explicit context given.  Dump all. */
4037         if (!nla)
4038                 goto dump;
4039         maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
4040         nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
4041         if (IS_ERR(nla))
4042                 return PTR_ERR(nla);
4043         /* context given, but no name present? */
4044         if (!nla)
4045                 return -EINVAL;
4046         resource_name = nla_data(nla);
4047         if (!*resource_name)
4048                 return -ENODEV;
4049         resource = drbd_find_resource(resource_name);
4050         if (!resource)
4051                 return -ENODEV;
4052
4053         kref_put(&resource->kref, drbd_destroy_resource); /* get_one_status() revalidates the resource */
4054
4055         /* prime iterators, and set "filter" mode mark:
4056          * only dump this connection. */
4057         cb->args[0] = (long)resource;
4058         /* cb->args[1] = 0; passed in this way. */
4059         cb->args[2] = (long)resource;
4060
4061 dump:
4062         return get_one_status(skb, cb);
4063 }
4064
4065 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
4066 {
4067         struct drbd_config_context adm_ctx;
4068         enum drbd_ret_code retcode;
4069         struct timeout_parms tp;
4070         int err;
4071
4072         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
4073         if (!adm_ctx.reply_skb)
4074                 return retcode;
4075         if (retcode != NO_ERROR)
4076                 goto out;
4077
4078         tp.timeout_type =
4079                 adm_ctx.device->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
4080                 test_bit(USE_DEGR_WFC_T, &adm_ctx.device->flags) ? UT_DEGRADED :
4081                 UT_DEFAULT;
4082
4083         err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
4084         if (err) {
4085                 nlmsg_free(adm_ctx.reply_skb);
4086                 return err;
4087         }
4088 out:
4089         drbd_adm_finish(&adm_ctx, info, retcode);
4090         return 0;
4091 }
4092
4093 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
4094 {
4095         struct drbd_config_context adm_ctx;
4096         struct drbd_device *device;
4097         enum drbd_ret_code retcode;
4098         struct start_ov_parms parms;
4099
4100         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
4101         if (!adm_ctx.reply_skb)
4102                 return retcode;
4103         if (retcode != NO_ERROR)
4104                 goto out;
4105
4106         device = adm_ctx.device;
4107
4108         /* resume from last known position, if possible */
4109         parms.ov_start_sector = device->ov_start_sector;
4110         parms.ov_stop_sector = ULLONG_MAX;
4111         if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
4112                 int err = start_ov_parms_from_attrs(&parms, info);
4113                 if (err) {
4114                         retcode = ERR_MANDATORY_TAG;
4115                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
4116                         goto out;
4117                 }
4118         }
4119         mutex_lock(&adm_ctx.resource->adm_mutex);
4120
4121         /* w_make_ov_request expects position to be aligned */
4122         device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
4123         device->ov_stop_sector = parms.ov_stop_sector;
4124
4125         /* If there is still bitmap IO pending, e.g. previous resync or verify
4126          * just being finished, wait for it before requesting a new resync. */
4127         drbd_suspend_io(device);
4128         wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
4129         retcode = drbd_request_state(device, NS(conn, C_VERIFY_S));
4130         drbd_resume_io(device);
4131
4132         mutex_unlock(&adm_ctx.resource->adm_mutex);
4133 out:
4134         drbd_adm_finish(&adm_ctx, info, retcode);
4135         return 0;
4136 }
4137
4138
4139 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
4140 {
4141         struct drbd_config_context adm_ctx;
4142         struct drbd_device *device;
4143         enum drbd_ret_code retcode;
4144         int skip_initial_sync = 0;
4145         int err;
4146         struct new_c_uuid_parms args;
4147
4148         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
4149         if (!adm_ctx.reply_skb)
4150                 return retcode;
4151         if (retcode != NO_ERROR)
4152                 goto out_nolock;
4153
4154         device = adm_ctx.device;
4155         memset(&args, 0, sizeof(args));
4156         if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
4157                 err = new_c_uuid_parms_from_attrs(&args, info);
4158                 if (err) {
4159                         retcode = ERR_MANDATORY_TAG;
4160                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
4161                         goto out_nolock;
4162                 }
4163         }
4164
4165         mutex_lock(&adm_ctx.resource->adm_mutex);
4166         mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */
4167
4168         if (!get_ldev(device)) {
4169                 retcode = ERR_NO_DISK;
4170                 goto out;
4171         }
4172
4173         /* this is "skip initial sync", assume to be clean */
4174         if (device->state.conn == C_CONNECTED &&
4175             first_peer_device(device)->connection->agreed_pro_version >= 90 &&
4176             device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
4177                 drbd_info(device, "Preparing to skip initial sync\n");
4178                 skip_initial_sync = 1;
4179         } else if (device->state.conn != C_STANDALONE) {
4180                 retcode = ERR_CONNECTED;
4181                 goto out_dec;
4182         }
4183
4184         drbd_uuid_set(device, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */
4185         drbd_uuid_new_current(device); /* New current, previous to UI_BITMAP */
4186
4187         if (args.clear_bm) {
4188                 err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
4189                         "clear_n_write from new_c_uuid", BM_LOCKED_MASK);
4190                 if (err) {
4191                         drbd_err(device, "Writing bitmap failed with %d\n", err);
4192                         retcode = ERR_IO_MD_DISK;
4193                 }
4194                 if (skip_initial_sync) {
4195                         drbd_send_uuids_skip_initial_sync(first_peer_device(device));
4196                         _drbd_uuid_set(device, UI_BITMAP, 0);
4197                         drbd_print_uuids(device, "cleared bitmap UUID");
4198                         spin_lock_irq(&device->resource->req_lock);
4199                         _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
4200                                         CS_VERBOSE, NULL);
4201                         spin_unlock_irq(&device->resource->req_lock);
4202                 }
4203         }
4204
4205         drbd_md_sync(device);
4206 out_dec:
4207         put_ldev(device);
4208 out:
4209         mutex_unlock(device->state_mutex);
4210         mutex_unlock(&adm_ctx.resource->adm_mutex);
4211 out_nolock:
4212         drbd_adm_finish(&adm_ctx, info, retcode);
4213         return 0;
4214 }
4215
4216 static enum drbd_ret_code
4217 drbd_check_resource_name(struct drbd_config_context *adm_ctx)
4218 {
4219         const char *name = adm_ctx->resource_name;
4220         if (!name || !name[0]) {
4221                 drbd_msg_put_info(adm_ctx->reply_skb, "resource name missing");
4222                 return ERR_MANDATORY_TAG;
4223         }
4224         /* if we want to use these in sysfs/configfs/debugfs some day,
4225          * we must not allow slashes */
4226         if (strchr(name, '/')) {
4227                 drbd_msg_put_info(adm_ctx->reply_skb, "invalid resource name");
4228                 return ERR_INVALID_REQUEST;
4229         }
4230         return NO_ERROR;
4231 }
4232
4233 static void resource_to_info(struct resource_info *info,
4234                              struct drbd_resource *resource)
4235 {
4236         info->res_role = conn_highest_role(first_connection(resource));
4237         info->res_susp = resource->susp;
4238         info->res_susp_nod = resource->susp_nod;
4239         info->res_susp_fen = resource->susp_fen;
4240 }
4241
4242 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
4243 {
4244         struct drbd_connection *connection;
4245         struct drbd_config_context adm_ctx;
4246         enum drbd_ret_code retcode;
4247         struct res_opts res_opts;
4248         int err;
4249
4250         retcode = drbd_adm_prepare(&adm_ctx, skb, info, 0);
4251         if (!adm_ctx.reply_skb)
4252                 return retcode;
4253         if (retcode != NO_ERROR)
4254                 goto out;
4255
4256         set_res_opts_defaults(&res_opts);
4257         err = res_opts_from_attrs(&res_opts, info);
4258         if (err && err != -ENOMSG) {
4259                 retcode = ERR_MANDATORY_TAG;
4260                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
4261                 goto out;
4262         }
4263
4264         retcode = drbd_check_resource_name(&adm_ctx);
4265         if (retcode != NO_ERROR)
4266                 goto out;
4267
4268         if (adm_ctx.resource) {
4269                 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
4270                         retcode = ERR_INVALID_REQUEST;
4271                         drbd_msg_put_info(adm_ctx.reply_skb, "resource exists");
4272                 }
4273                 /* else: still NO_ERROR */
4274                 goto out;
4275         }
4276
4277         /* not yet safe for genl_family.parallel_ops */
4278         mutex_lock(&resources_mutex);
4279         connection = conn_create(adm_ctx.resource_name, &res_opts);
4280         mutex_unlock(&resources_mutex);
4281
4282         if (connection) {
4283                 struct resource_info resource_info;
4284
4285                 mutex_lock(&notification_mutex);
4286                 resource_to_info(&resource_info, connection->resource);
4287                 notify_resource_state(NULL, 0, connection->resource,
4288                                       &resource_info, NOTIFY_CREATE);
4289                 mutex_unlock(&notification_mutex);
4290         } else
4291                 retcode = ERR_NOMEM;
4292
4293 out:
4294         drbd_adm_finish(&adm_ctx, info, retcode);
4295         return 0;
4296 }
4297
4298 static void device_to_info(struct device_info *info,
4299                            struct drbd_device *device)
4300 {
4301         info->dev_disk_state = device->state.disk;
4302 }
4303
4304
4305 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
4306 {
4307         struct drbd_config_context adm_ctx;
4308         struct drbd_genlmsghdr *dh = info->userhdr;
4309         enum drbd_ret_code retcode;
4310
4311         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
4312         if (!adm_ctx.reply_skb)
4313                 return retcode;
4314         if (retcode != NO_ERROR)
4315                 goto out;
4316
4317         if (dh->minor > MINORMASK) {
4318                 drbd_msg_put_info(adm_ctx.reply_skb, "requested minor out of range");
4319                 retcode = ERR_INVALID_REQUEST;
4320                 goto out;
4321         }
4322         if (adm_ctx.volume > DRBD_VOLUME_MAX) {
4323                 drbd_msg_put_info(adm_ctx.reply_skb, "requested volume id out of range");
4324                 retcode = ERR_INVALID_REQUEST;
4325                 goto out;
4326         }
4327
4328         /* drbd_adm_prepare made sure already
4329          * that first_peer_device(device)->connection and device->vnr match the request. */
4330         if (adm_ctx.device) {
4331                 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
4332                         retcode = ERR_MINOR_OR_VOLUME_EXISTS;
4333                 /* else: still NO_ERROR */
4334                 goto out;
4335         }
4336
4337         mutex_lock(&adm_ctx.resource->adm_mutex);
4338         retcode = drbd_create_device(&adm_ctx, dh->minor);
4339         if (retcode == NO_ERROR) {
4340                 struct drbd_device *device;
4341                 struct drbd_peer_device *peer_device;
4342                 struct device_info info;
4343                 unsigned int peer_devices = 0;
4344                 enum drbd_notification_type flags;
4345
4346                 device = minor_to_device(dh->minor);
4347                 for_each_peer_device(peer_device, device) {
4348                         if (!has_net_conf(peer_device->connection))
4349                                 continue;
4350                         peer_devices++;
4351                 }
4352
4353                 device_to_info(&info, device);
4354                 mutex_lock(&notification_mutex);
4355                 flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
4356                 notify_device_state(NULL, 0, device, &info, NOTIFY_CREATE | flags);
4357                 for_each_peer_device(peer_device, device) {
4358                         struct peer_device_info peer_device_info;
4359
4360                         if (!has_net_conf(peer_device->connection))
4361                                 continue;
4362                         peer_device_to_info(&peer_device_info, peer_device);
4363                         flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
4364                         notify_peer_device_state(NULL, 0, peer_device, &peer_device_info,
4365                                                  NOTIFY_CREATE | flags);
4366                 }
4367                 mutex_unlock(&notification_mutex);
4368         }
4369         mutex_unlock(&adm_ctx.resource->adm_mutex);
4370 out:
4371         drbd_adm_finish(&adm_ctx, info, retcode);
4372         return 0;
4373 }
4374
4375 static enum drbd_ret_code adm_del_minor(struct drbd_device *device)
4376 {
4377         struct drbd_peer_device *peer_device;
4378
4379         if (device->state.disk == D_DISKLESS &&
4380             /* no need to be device->state.conn == C_STANDALONE &&
4381              * we may want to delete a minor from a live replication group.
4382              */
4383             device->state.role == R_SECONDARY) {
4384                 struct drbd_connection *connection =
4385                         first_connection(device->resource);
4386
4387                 _drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS),
4388                                     CS_VERBOSE + CS_WAIT_COMPLETE);
4389
4390                 /* If the state engine hasn't stopped the sender thread yet, we
4391                  * need to flush the sender work queue before generating the
4392                  * DESTROY events here. */
4393                 if (get_t_state(&connection->worker) == RUNNING)
4394                         drbd_flush_workqueue(&connection->sender_work);
4395
4396                 mutex_lock(&notification_mutex);
4397                 for_each_peer_device(peer_device, device) {
4398                         if (!has_net_conf(peer_device->connection))
4399                                 continue;
4400                         notify_peer_device_state(NULL, 0, peer_device, NULL,
4401                                                  NOTIFY_DESTROY | NOTIFY_CONTINUES);
4402                 }
4403                 notify_device_state(NULL, 0, device, NULL, NOTIFY_DESTROY);
4404                 mutex_unlock(&notification_mutex);
4405
4406                 drbd_delete_device(device);
4407                 return NO_ERROR;
4408         } else
4409                 return ERR_MINOR_CONFIGURED;
4410 }
4411
4412 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info)
4413 {
4414         struct drbd_config_context adm_ctx;
4415         enum drbd_ret_code retcode;
4416
4417         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
4418         if (!adm_ctx.reply_skb)
4419                 return retcode;
4420         if (retcode != NO_ERROR)
4421                 goto out;
4422
4423         mutex_lock(&adm_ctx.resource->adm_mutex);
4424         retcode = adm_del_minor(adm_ctx.device);
4425         mutex_unlock(&adm_ctx.resource->adm_mutex);
4426 out:
4427         drbd_adm_finish(&adm_ctx, info, retcode);
4428         return 0;
4429 }
4430
4431 static int adm_del_resource(struct drbd_resource *resource)
4432 {
4433         struct drbd_connection *connection;
4434
4435         for_each_connection(connection, resource) {
4436                 if (connection->cstate > C_STANDALONE)
4437                         return ERR_NET_CONFIGURED;
4438         }
4439         if (!idr_is_empty(&resource->devices))
4440                 return ERR_RES_IN_USE;
4441
4442         /* The state engine has stopped the sender thread, so we don't
4443          * need to flush the sender work queue before generating the
4444          * DESTROY event here. */
4445         mutex_lock(&notification_mutex);
4446         notify_resource_state(NULL, 0, resource, NULL, NOTIFY_DESTROY);
4447         mutex_unlock(&notification_mutex);
4448
4449         mutex_lock(&resources_mutex);
4450         list_del_rcu(&resource->resources);
4451         mutex_unlock(&resources_mutex);
4452         /* Make sure all threads have actually stopped: state handling only
4453          * does drbd_thread_stop_nowait(). */
4454         list_for_each_entry(connection, &resource->connections, connections)
4455                 drbd_thread_stop(&connection->worker);
4456         synchronize_rcu();
4457         drbd_free_resource(resource);
4458         return NO_ERROR;
4459 }
4460
4461 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
4462 {
4463         struct drbd_config_context adm_ctx;
4464         struct drbd_resource *resource;
4465         struct drbd_connection *connection;
4466         struct drbd_device *device;
4467         int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
4468         unsigned i;
4469
4470         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
4471         if (!adm_ctx.reply_skb)
4472                 return retcode;
4473         if (retcode != NO_ERROR)
4474                 goto finish;
4475
4476         resource = adm_ctx.resource;
4477         mutex_lock(&resource->adm_mutex);
4478         /* demote */
4479         for_each_connection(connection, resource) {
4480                 struct drbd_peer_device *peer_device;
4481
4482                 idr_for_each_entry(&connection->peer_devices, peer_device, i) {
4483                         retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0);
4484                         if (retcode < SS_SUCCESS) {
4485                                 drbd_msg_put_info(adm_ctx.reply_skb, "failed to demote");
4486                                 goto out;
4487                         }
4488                 }
4489
4490                 retcode = conn_try_disconnect(connection, 0);
4491                 if (retcode < SS_SUCCESS) {
4492                         drbd_msg_put_info(adm_ctx.reply_skb, "failed to disconnect");
4493                         goto out;
4494                 }
4495         }
4496
4497         /* detach */
4498         idr_for_each_entry(&resource->devices, device, i) {
4499                 retcode = adm_detach(device, 0);
4500                 if (retcode < SS_SUCCESS || retcode > NO_ERROR) {
4501                         drbd_msg_put_info(adm_ctx.reply_skb, "failed to detach");
4502                         goto out;
4503                 }
4504         }
4505
4506         /* delete volumes */
4507         idr_for_each_entry(&resource->devices, device, i) {
4508                 retcode = adm_del_minor(device);
4509                 if (retcode != NO_ERROR) {
4510                         /* "can not happen" */
4511                         drbd_msg_put_info(adm_ctx.reply_skb, "failed to delete volume");
4512                         goto out;
4513                 }
4514         }
4515
4516         retcode = adm_del_resource(resource);
4517 out:
4518         mutex_unlock(&resource->adm_mutex);
4519 finish:
4520         drbd_adm_finish(&adm_ctx, info, retcode);
4521         return 0;
4522 }
4523
4524 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
4525 {
4526         struct drbd_config_context adm_ctx;
4527         struct drbd_resource *resource;
4528         enum drbd_ret_code retcode;
4529
4530         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
4531         if (!adm_ctx.reply_skb)
4532                 return retcode;
4533         if (retcode != NO_ERROR)
4534                 goto finish;
4535         resource = adm_ctx.resource;
4536
4537         mutex_lock(&resource->adm_mutex);
4538         retcode = adm_del_resource(resource);
4539         mutex_unlock(&resource->adm_mutex);
4540 finish:
4541         drbd_adm_finish(&adm_ctx, info, retcode);
4542         return 0;
4543 }
4544
4545 void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
4546 {
4547         struct sk_buff *msg;
4548         struct drbd_genlmsghdr *d_out;
4549         unsigned seq;
4550         int err = -ENOMEM;
4551
4552         seq = atomic_inc_return(&drbd_genl_seq);
4553         msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
4554         if (!msg)
4555                 goto failed;
4556
4557         err = -EMSGSIZE;
4558         d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT);
4559         if (!d_out) /* cannot happen, but anyways. */
4560                 goto nla_put_failure;
4561         d_out->minor = device_to_minor(device);
4562         d_out->ret_code = NO_ERROR;
4563
4564         if (nla_put_status_info(msg, device, sib))
4565                 goto nla_put_failure;
4566         genlmsg_end(msg, d_out);
4567         err = drbd_genl_multicast_events(msg, GFP_NOWAIT);
4568         /* msg has been consumed or freed in netlink_broadcast() */
4569         if (err && err != -ESRCH)
4570                 goto failed;
4571
4572         return;
4573
4574 nla_put_failure:
4575         nlmsg_free(msg);
4576 failed:
4577         drbd_err(device, "Error %d while broadcasting event. "
4578                         "Event seq:%u sib_reason:%u\n",
4579                         err, seq, sib->sib_reason);
4580 }
4581
4582 static int nla_put_notification_header(struct sk_buff *msg,
4583                                        enum drbd_notification_type type)
4584 {
4585         struct drbd_notification_header nh = {
4586                 .nh_type = type,
4587         };
4588
4589         return drbd_notification_header_to_skb(msg, &nh, true);
4590 }
4591
4592 void notify_resource_state(struct sk_buff *skb,
4593                            unsigned int seq,
4594                            struct drbd_resource *resource,
4595                            struct resource_info *resource_info,
4596                            enum drbd_notification_type type)
4597 {
4598         struct resource_statistics resource_statistics;
4599         struct drbd_genlmsghdr *dh;
4600         bool multicast = false;
4601         int err;
4602
4603         if (!skb) {
4604                 seq = atomic_inc_return(&notify_genl_seq);
4605                 skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
4606                 err = -ENOMEM;
4607                 if (!skb)
4608                         goto failed;
4609                 multicast = true;
4610         }
4611
4612         err = -EMSGSIZE;
4613         dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_RESOURCE_STATE);
4614         if (!dh)
4615                 goto nla_put_failure;
4616         dh->minor = -1U;
4617         dh->ret_code = NO_ERROR;
4618         if (nla_put_drbd_cfg_context(skb, resource, NULL, NULL) ||
4619             nla_put_notification_header(skb, type) ||
4620             ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
4621              resource_info_to_skb(skb, resource_info, true)))
4622                 goto nla_put_failure;
4623         resource_statistics.res_stat_write_ordering = resource->write_ordering;
4624         err = resource_statistics_to_skb(skb, &resource_statistics, !capable(CAP_SYS_ADMIN));
4625         if (err)
4626                 goto nla_put_failure;
4627         genlmsg_end(skb, dh);
4628         if (multicast) {
4629                 err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
4630                 /* skb has been consumed or freed in netlink_broadcast() */
4631                 if (err && err != -ESRCH)
4632                         goto failed;
4633         }
4634         return;
4635
4636 nla_put_failure:
4637         nlmsg_free(skb);
4638 failed:
4639         drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n",
4640                         err, seq);
4641 }
4642
4643 void notify_device_state(struct sk_buff *skb,
4644                          unsigned int seq,
4645                          struct drbd_device *device,
4646                          struct device_info *device_info,
4647                          enum drbd_notification_type type)
4648 {
4649         struct device_statistics device_statistics;
4650         struct drbd_genlmsghdr *dh;
4651         bool multicast = false;
4652         int err;
4653
4654         if (!skb) {
4655                 seq = atomic_inc_return(&notify_genl_seq);
4656                 skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
4657                 err = -ENOMEM;
4658                 if (!skb)
4659                         goto failed;
4660                 multicast = true;
4661         }
4662
4663         err = -EMSGSIZE;
4664         dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_DEVICE_STATE);
4665         if (!dh)
4666                 goto nla_put_failure;
4667         dh->minor = device->minor;
4668         dh->ret_code = NO_ERROR;
4669         if (nla_put_drbd_cfg_context(skb, device->resource, NULL, device) ||
4670             nla_put_notification_header(skb, type) ||
4671             ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
4672              device_info_to_skb(skb, device_info, true)))
4673                 goto nla_put_failure;
4674         device_to_statistics(&device_statistics, device);
4675         device_statistics_to_skb(skb, &device_statistics, !capable(CAP_SYS_ADMIN));
4676         genlmsg_end(skb, dh);
4677         if (multicast) {
4678                 err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
4679                 /* skb has been consumed or freed in netlink_broadcast() */
4680                 if (err && err != -ESRCH)
4681                         goto failed;
4682         }
4683         return;
4684
4685 nla_put_failure:
4686         nlmsg_free(skb);
4687 failed:
4688         drbd_err(device, "Error %d while broadcasting event. Event seq:%u\n",
4689                  err, seq);
4690 }
4691
4692 void notify_connection_state(struct sk_buff *skb,
4693                              unsigned int seq,
4694                              struct drbd_connection *connection,
4695                              struct connection_info *connection_info,
4696                              enum drbd_notification_type type)
4697 {
4698         struct connection_statistics connection_statistics;
4699         struct drbd_genlmsghdr *dh;
4700         bool multicast = false;
4701         int err;
4702
4703         if (!skb) {
4704                 seq = atomic_inc_return(&notify_genl_seq);
4705                 skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
4706                 err = -ENOMEM;
4707                 if (!skb)
4708                         goto failed;
4709                 multicast = true;
4710         }
4711
4712         err = -EMSGSIZE;
4713         dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_CONNECTION_STATE);
4714         if (!dh)
4715                 goto nla_put_failure;
4716         dh->minor = -1U;
4717         dh->ret_code = NO_ERROR;
4718         if (nla_put_drbd_cfg_context(skb, connection->resource, connection, NULL) ||
4719             nla_put_notification_header(skb, type) ||
4720             ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
4721              connection_info_to_skb(skb, connection_info, true)))
4722                 goto nla_put_failure;
4723         connection_statistics.conn_congested = test_bit(NET_CONGESTED, &connection->flags);
4724         connection_statistics_to_skb(skb, &connection_statistics, !capable(CAP_SYS_ADMIN));
4725         genlmsg_end(skb, dh);
4726         if (multicast) {
4727                 err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
4728                 /* skb has been consumed or freed in netlink_broadcast() */
4729                 if (err && err != -ESRCH)
4730                         goto failed;
4731         }
4732         return;
4733
4734 nla_put_failure:
4735         nlmsg_free(skb);
4736 failed:
4737         drbd_err(connection, "Error %d while broadcasting event. Event seq:%u\n",
4738                  err, seq);
4739 }
4740
4741 void notify_peer_device_state(struct sk_buff *skb,
4742                               unsigned int seq,
4743                               struct drbd_peer_device *peer_device,
4744                               struct peer_device_info *peer_device_info,
4745                               enum drbd_notification_type type)
4746 {
4747         struct peer_device_statistics peer_device_statistics;
4748         struct drbd_resource *resource = peer_device->device->resource;
4749         struct drbd_genlmsghdr *dh;
4750         bool multicast = false;
4751         int err;
4752
4753         if (!skb) {
4754                 seq = atomic_inc_return(&notify_genl_seq);
4755                 skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
4756                 err = -ENOMEM;
4757                 if (!skb)
4758                         goto failed;
4759                 multicast = true;
4760         }
4761
4762         err = -EMSGSIZE;
4763         dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_PEER_DEVICE_STATE);
4764         if (!dh)
4765                 goto nla_put_failure;
4766         dh->minor = -1U;
4767         dh->ret_code = NO_ERROR;
4768         if (nla_put_drbd_cfg_context(skb, resource, peer_device->connection, peer_device->device) ||
4769             nla_put_notification_header(skb, type) ||
4770             ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
4771              peer_device_info_to_skb(skb, peer_device_info, true)))
4772                 goto nla_put_failure;
4773         peer_device_to_statistics(&peer_device_statistics, peer_device);
4774         peer_device_statistics_to_skb(skb, &peer_device_statistics, !capable(CAP_SYS_ADMIN));
4775         genlmsg_end(skb, dh);
4776         if (multicast) {
4777                 err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
4778                 /* skb has been consumed or freed in netlink_broadcast() */
4779                 if (err && err != -ESRCH)
4780                         goto failed;
4781         }
4782         return;
4783
4784 nla_put_failure:
4785         nlmsg_free(skb);
4786 failed:
4787         drbd_err(peer_device, "Error %d while broadcasting event. Event seq:%u\n",
4788                  err, seq);
4789 }
4790
4791 void notify_helper(enum drbd_notification_type type,
4792                    struct drbd_device *device, struct drbd_connection *connection,
4793                    const char *name, int status)
4794 {
4795         struct drbd_resource *resource = device ? device->resource : connection->resource;
4796         struct drbd_helper_info helper_info;
4797         unsigned int seq = atomic_inc_return(&notify_genl_seq);
4798         struct sk_buff *skb = NULL;
4799         struct drbd_genlmsghdr *dh;
4800         int err;
4801
4802         strlcpy(helper_info.helper_name, name, sizeof(helper_info.helper_name));
4803         helper_info.helper_name_len = min(strlen(name), sizeof(helper_info.helper_name));
4804         helper_info.helper_status = status;
4805
4806         skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
4807         err = -ENOMEM;
4808         if (!skb)
4809                 goto fail;
4810
4811         err = -EMSGSIZE;
4812         dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_HELPER);
4813         if (!dh)
4814                 goto fail;
4815         dh->minor = device ? device->minor : -1;
4816         dh->ret_code = NO_ERROR;
4817         mutex_lock(&notification_mutex);
4818         if (nla_put_drbd_cfg_context(skb, resource, connection, device) ||
4819             nla_put_notification_header(skb, type) ||
4820             drbd_helper_info_to_skb(skb, &helper_info, true))
4821                 goto unlock_fail;
4822         genlmsg_end(skb, dh);
4823         err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
4824         skb = NULL;
4825         /* skb has been consumed or freed in netlink_broadcast() */
4826         if (err && err != -ESRCH)
4827                 goto unlock_fail;
4828         mutex_unlock(&notification_mutex);
4829         return;
4830
4831 unlock_fail:
4832         mutex_unlock(&notification_mutex);
4833 fail:
4834         nlmsg_free(skb);
4835         drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n",
4836                  err, seq);
4837 }
4838
4839 static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq)
4840 {
4841         struct drbd_genlmsghdr *dh;
4842         int err;
4843
4844         err = -EMSGSIZE;
4845         dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_INITIAL_STATE_DONE);
4846         if (!dh)
4847                 goto nla_put_failure;
4848         dh->minor = -1U;
4849         dh->ret_code = NO_ERROR;
4850         if (nla_put_notification_header(skb, NOTIFY_EXISTS))
4851                 goto nla_put_failure;
4852         genlmsg_end(skb, dh);
4853         return;
4854
4855 nla_put_failure:
4856         nlmsg_free(skb);
4857         pr_err("Error %d sending event. Event seq:%u\n", err, seq);
4858 }
4859
4860 static void free_state_changes(struct list_head *list)
4861 {
4862         while (!list_empty(list)) {
4863                 struct drbd_state_change *state_change =
4864                         list_first_entry(list, struct drbd_state_change, list);
4865                 list_del(&state_change->list);
4866                 forget_state_change(state_change);
4867         }
4868 }
4869
4870 static unsigned int notifications_for_state_change(struct drbd_state_change *state_change)
4871 {
4872         return 1 +
4873                state_change->n_connections +
4874                state_change->n_devices +
4875                state_change->n_devices * state_change->n_connections;
4876 }
4877
4878 static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
4879 {
4880         struct drbd_state_change *state_change = (struct drbd_state_change *)cb->args[0];
4881         unsigned int seq = cb->args[2];
4882         unsigned int n;
4883         enum drbd_notification_type flags = 0;
4884
4885         /* There is no need for taking notification_mutex here: it doesn't
4886            matter if the initial state events mix with later state chage
4887            events; we can always tell the events apart by the NOTIFY_EXISTS
4888            flag. */
4889
4890         cb->args[5]--;
4891         if (cb->args[5] == 1) {
4892                 notify_initial_state_done(skb, seq);
4893                 goto out;
4894         }
4895         n = cb->args[4]++;
4896         if (cb->args[4] < cb->args[3])
4897                 flags |= NOTIFY_CONTINUES;
4898         if (n < 1) {
4899                 notify_resource_state_change(skb, seq, state_change->resource,
4900                                              NOTIFY_EXISTS | flags);
4901                 goto next;
4902         }
4903         n--;
4904         if (n < state_change->n_connections) {
4905                 notify_connection_state_change(skb, seq, &state_change->connections[n],
4906                                                NOTIFY_EXISTS | flags);
4907                 goto next;
4908         }
4909         n -= state_change->n_connections;
4910         if (n < state_change->n_devices) {
4911                 notify_device_state_change(skb, seq, &state_change->devices[n],
4912                                            NOTIFY_EXISTS | flags);
4913                 goto next;
4914         }
4915         n -= state_change->n_devices;
4916         if (n < state_change->n_devices * state_change->n_connections) {
4917                 notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n],
4918                                                 NOTIFY_EXISTS | flags);
4919                 goto next;
4920         }
4921
4922 next:
4923         if (cb->args[4] == cb->args[3]) {
4924                 struct drbd_state_change *next_state_change =
4925                         list_entry(state_change->list.next,
4926                                    struct drbd_state_change, list);
4927                 cb->args[0] = (long)next_state_change;
4928                 cb->args[3] = notifications_for_state_change(next_state_change);
4929                 cb->args[4] = 0;
4930         }
4931 out:
4932         return skb->len;
4933 }
4934
4935 int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
4936 {
4937         struct drbd_resource *resource;
4938         LIST_HEAD(head);
4939
4940         if (cb->args[5] >= 1) {
4941                 if (cb->args[5] > 1)
4942                         return get_initial_state(skb, cb);
4943                 if (cb->args[0]) {
4944                         struct drbd_state_change *state_change =
4945                                 (struct drbd_state_change *)cb->args[0];
4946
4947                         /* connect list to head */
4948                         list_add(&head, &state_change->list);
4949                         free_state_changes(&head);
4950                 }
4951                 return 0;
4952         }
4953
4954         cb->args[5] = 2;  /* number of iterations */
4955         mutex_lock(&resources_mutex);
4956         for_each_resource(resource, &drbd_resources) {
4957                 struct drbd_state_change *state_change;
4958
4959                 state_change = remember_old_state(resource, GFP_KERNEL);
4960                 if (!state_change) {
4961                         if (!list_empty(&head))
4962                                 free_state_changes(&head);
4963                         mutex_unlock(&resources_mutex);
4964                         return -ENOMEM;
4965                 }
4966                 copy_old_to_new_state_change(state_change);
4967                 list_add_tail(&state_change->list, &head);
4968                 cb->args[5] += notifications_for_state_change(state_change);
4969         }
4970         mutex_unlock(&resources_mutex);
4971
4972         if (!list_empty(&head)) {
4973                 struct drbd_state_change *state_change =
4974                         list_entry(head.next, struct drbd_state_change, list);
4975                 cb->args[0] = (long)state_change;
4976                 cb->args[3] = notifications_for_state_change(state_change);
4977                 list_del(&head);  /* detach list from head */
4978         }
4979
4980         cb->args[2] = cb->nlh->nlmsg_seq;
4981         return get_initial_state(skb, cb);
4982 }