+ dout("%s rbd_dev %p wake_all %d\n", __func__, rbd_dev, wake_all);
+
+ cancel_delayed_work(&rbd_dev->lock_dwork);
+ if (wake_all)
+ wake_up_all(&rbd_dev->lock_waitq);
+ else
+ wake_up(&rbd_dev->lock_waitq);
+}
+
+static int get_lock_owner_info(struct rbd_device *rbd_dev,
+ struct ceph_locker **lockers, u32 *num_lockers)
+{
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+ u8 lock_type;
+ char *lock_tag;
+ int ret;
+
+ dout("%s rbd_dev %p\n", __func__, rbd_dev);
+
+ ret = ceph_cls_lock_info(osdc, &rbd_dev->header_oid,
+ &rbd_dev->header_oloc, RBD_LOCK_NAME,
+ &lock_type, &lock_tag, lockers, num_lockers);
+ if (ret)
+ return ret;
+
+ if (*num_lockers == 0) {
+ dout("%s rbd_dev %p no lockers detected\n", __func__, rbd_dev);
+ goto out;
+ }
+
+ if (strcmp(lock_tag, RBD_LOCK_TAG)) {
+ rbd_warn(rbd_dev, "locked by external mechanism, tag %s",
+ lock_tag);
+ ret = -EBUSY;
+ goto out;
+ }
+
+ if (lock_type == CEPH_CLS_LOCK_SHARED) {
+ rbd_warn(rbd_dev, "shared lock type detected");
+ ret = -EBUSY;
+ goto out;
+ }
+
+ if (strncmp((*lockers)[0].id.cookie, RBD_LOCK_COOKIE_PREFIX,
+ strlen(RBD_LOCK_COOKIE_PREFIX))) {
+ rbd_warn(rbd_dev, "locked by external mechanism, cookie %s",
+ (*lockers)[0].id.cookie);
+ ret = -EBUSY;
+ goto out;
+ }
+
+out:
+ kfree(lock_tag);
+ return ret;
+}
+
+static int find_watcher(struct rbd_device *rbd_dev,
+ const struct ceph_locker *locker)
+{
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+ struct ceph_watch_item *watchers;
+ u32 num_watchers;
+ u64 cookie;
+ int i;
+ int ret;
+
+ ret = ceph_osdc_list_watchers(osdc, &rbd_dev->header_oid,
+ &rbd_dev->header_oloc, &watchers,
+ &num_watchers);
+ if (ret)
+ return ret;
+
+ sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie);
+ for (i = 0; i < num_watchers; i++) {
+ if (!memcmp(&watchers[i].addr, &locker->info.addr,
+ sizeof(locker->info.addr)) &&
+ watchers[i].cookie == cookie) {
+ struct rbd_client_id cid = {
+ .gid = le64_to_cpu(watchers[i].name.num),
+ .handle = cookie,
+ };
+
+ dout("%s rbd_dev %p found cid %llu-%llu\n", __func__,
+ rbd_dev, cid.gid, cid.handle);
+ rbd_set_owner_cid(rbd_dev, &cid);
+ ret = 1;
+ goto out;
+ }
+ }
+
+ dout("%s rbd_dev %p no watchers\n", __func__, rbd_dev);
+ ret = 0;
+out:
+ kfree(watchers);
+ return ret;
+}
+
+/*
+ * lock_rwsem must be held for write
+ */
+static int rbd_try_lock(struct rbd_device *rbd_dev)
+{
+ struct ceph_client *client = rbd_dev->rbd_client->client;
+ struct ceph_locker *lockers;
+ u32 num_lockers;
+ int ret;
+
+ for (;;) {
+ ret = rbd_lock(rbd_dev);
+ if (ret != -EBUSY)
+ return ret;
+
+ /* determine if the current lock holder is still alive */
+ ret = get_lock_owner_info(rbd_dev, &lockers, &num_lockers);
+ if (ret)
+ return ret;
+
+ if (num_lockers == 0)
+ goto again;
+
+ ret = find_watcher(rbd_dev, lockers);
+ if (ret) {
+ if (ret > 0)
+ ret = 0; /* have to request lock */
+ goto out;
+ }
+
+ rbd_warn(rbd_dev, "%s%llu seems dead, breaking lock",
+ ENTITY_NAME(lockers[0].id.name));
+
+ ret = ceph_monc_blacklist_add(&client->monc,
+ &lockers[0].info.addr);
+ if (ret) {
+ rbd_warn(rbd_dev, "blacklist of %s%llu failed: %d",
+ ENTITY_NAME(lockers[0].id.name), ret);
+ goto out;
+ }
+
+ ret = ceph_cls_break_lock(&client->osdc, &rbd_dev->header_oid,
+ &rbd_dev->header_oloc, RBD_LOCK_NAME,
+ lockers[0].id.cookie,
+ &lockers[0].id.name);
+ if (ret && ret != -ENOENT)
+ goto out;
+
+again:
+ ceph_free_lockers(lockers, num_lockers);
+ }
+
+out:
+ ceph_free_lockers(lockers, num_lockers);
+ return ret;
+}
+
+/*
+ * ret is set only if lock_state is RBD_LOCK_STATE_UNLOCKED
+ */
+static enum rbd_lock_state rbd_try_acquire_lock(struct rbd_device *rbd_dev,
+ int *pret)
+{
+ enum rbd_lock_state lock_state;
+
+ down_read(&rbd_dev->lock_rwsem);
+ dout("%s rbd_dev %p read lock_state %d\n", __func__, rbd_dev,
+ rbd_dev->lock_state);
+ if (__rbd_is_lock_owner(rbd_dev)) {
+ lock_state = rbd_dev->lock_state;
+ up_read(&rbd_dev->lock_rwsem);
+ return lock_state;
+ }
+
+ up_read(&rbd_dev->lock_rwsem);
+ down_write(&rbd_dev->lock_rwsem);
+ dout("%s rbd_dev %p write lock_state %d\n", __func__, rbd_dev,
+ rbd_dev->lock_state);
+ if (!__rbd_is_lock_owner(rbd_dev)) {
+ *pret = rbd_try_lock(rbd_dev);
+ if (*pret)
+ rbd_warn(rbd_dev, "failed to acquire lock: %d", *pret);
+ }
+
+ lock_state = rbd_dev->lock_state;
+ up_write(&rbd_dev->lock_rwsem);
+ return lock_state;
+}
+
+static void rbd_acquire_lock(struct work_struct *work)
+{
+ struct rbd_device *rbd_dev = container_of(to_delayed_work(work),
+ struct rbd_device, lock_dwork);
+ enum rbd_lock_state lock_state;
+ int ret;
+
+ dout("%s rbd_dev %p\n", __func__, rbd_dev);
+again:
+ lock_state = rbd_try_acquire_lock(rbd_dev, &ret);
+ if (lock_state != RBD_LOCK_STATE_UNLOCKED || ret == -EBLACKLISTED) {
+ if (lock_state == RBD_LOCK_STATE_LOCKED)
+ wake_requests(rbd_dev, true);
+ dout("%s rbd_dev %p lock_state %d ret %d - done\n", __func__,
+ rbd_dev, lock_state, ret);
+ return;
+ }
+
+ ret = rbd_request_lock(rbd_dev);
+ if (ret == -ETIMEDOUT) {
+ goto again; /* treat this as a dead client */
+ } else if (ret < 0) {
+ rbd_warn(rbd_dev, "error requesting lock: %d", ret);
+ mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork,
+ RBD_RETRY_DELAY);
+ } else {
+ /*
+ * lock owner acked, but resend if we don't see them
+ * release the lock
+ */
+ dout("%s rbd_dev %p requeueing lock_dwork\n", __func__,
+ rbd_dev);
+ mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork,
+ msecs_to_jiffies(2 * RBD_NOTIFY_TIMEOUT * MSEC_PER_SEC));
+ }
+}
+
+/*
+ * lock_rwsem must be held for write
+ */
+static bool rbd_release_lock(struct rbd_device *rbd_dev)
+{
+ dout("%s rbd_dev %p read lock_state %d\n", __func__, rbd_dev,
+ rbd_dev->lock_state);
+ if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED)
+ return false;
+
+ rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING;
+ downgrade_write(&rbd_dev->lock_rwsem);
+ /*
+ * Ensure that all in-flight IO is flushed.
+ *
+ * FIXME: ceph_osdc_sync() flushes the entire OSD client, which
+ * may be shared with other devices.
+ */
+ ceph_osdc_sync(&rbd_dev->rbd_client->client->osdc);
+ up_read(&rbd_dev->lock_rwsem);
+
+ down_write(&rbd_dev->lock_rwsem);
+ dout("%s rbd_dev %p write lock_state %d\n", __func__, rbd_dev,
+ rbd_dev->lock_state);
+ if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING)
+ return false;
+
+ if (!rbd_unlock(rbd_dev))
+ /*
+ * Give others a chance to grab the lock - we would re-acquire
+ * almost immediately if we got new IO during ceph_osdc_sync()
+ * otherwise. We need to ack our own notifications, so this
+ * lock_dwork will be requeued from rbd_wait_state_locked()
+ * after wake_requests() in rbd_handle_released_lock().
+ */
+ cancel_delayed_work(&rbd_dev->lock_dwork);
+
+ return true;
+}
+
+static void rbd_release_lock_work(struct work_struct *work)
+{
+ struct rbd_device *rbd_dev = container_of(work, struct rbd_device,
+ unlock_work);
+
+ down_write(&rbd_dev->lock_rwsem);
+ rbd_release_lock(rbd_dev);
+ up_write(&rbd_dev->lock_rwsem);
+}
+
+static void rbd_handle_acquired_lock(struct rbd_device *rbd_dev, u8 struct_v,
+ void **p)
+{
+ struct rbd_client_id cid = { 0 };
+
+ if (struct_v >= 2) {
+ cid.gid = ceph_decode_64(p);
+ cid.handle = ceph_decode_64(p);
+ }
+
+ dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid,
+ cid.handle);
+ if (!rbd_cid_equal(&cid, &rbd_empty_cid)) {
+ down_write(&rbd_dev->lock_rwsem);
+ if (rbd_cid_equal(&cid, &rbd_dev->owner_cid)) {
+ /*
+ * we already know that the remote client is
+ * the owner
+ */
+ up_write(&rbd_dev->lock_rwsem);
+ return;
+ }
+
+ rbd_set_owner_cid(rbd_dev, &cid);
+ downgrade_write(&rbd_dev->lock_rwsem);
+ } else {
+ down_read(&rbd_dev->lock_rwsem);
+ }
+
+ if (!__rbd_is_lock_owner(rbd_dev))
+ wake_requests(rbd_dev, false);
+ up_read(&rbd_dev->lock_rwsem);
+}
+
+static void rbd_handle_released_lock(struct rbd_device *rbd_dev, u8 struct_v,
+ void **p)
+{
+ struct rbd_client_id cid = { 0 };
+
+ if (struct_v >= 2) {
+ cid.gid = ceph_decode_64(p);
+ cid.handle = ceph_decode_64(p);
+ }
+
+ dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid,
+ cid.handle);
+ if (!rbd_cid_equal(&cid, &rbd_empty_cid)) {
+ down_write(&rbd_dev->lock_rwsem);
+ if (!rbd_cid_equal(&cid, &rbd_dev->owner_cid)) {
+ dout("%s rbd_dev %p unexpected owner, cid %llu-%llu != owner_cid %llu-%llu\n",
+ __func__, rbd_dev, cid.gid, cid.handle,
+ rbd_dev->owner_cid.gid, rbd_dev->owner_cid.handle);
+ up_write(&rbd_dev->lock_rwsem);
+ return;
+ }
+
+ rbd_set_owner_cid(rbd_dev, &rbd_empty_cid);
+ downgrade_write(&rbd_dev->lock_rwsem);
+ } else {
+ down_read(&rbd_dev->lock_rwsem);
+ }
+
+ if (!__rbd_is_lock_owner(rbd_dev))
+ wake_requests(rbd_dev, false);
+ up_read(&rbd_dev->lock_rwsem);
+}
+
+static bool rbd_handle_request_lock(struct rbd_device *rbd_dev, u8 struct_v,
+ void **p)
+{
+ struct rbd_client_id my_cid = rbd_get_cid(rbd_dev);
+ struct rbd_client_id cid = { 0 };
+ bool need_to_send;
+
+ if (struct_v >= 2) {
+ cid.gid = ceph_decode_64(p);
+ cid.handle = ceph_decode_64(p);
+ }
+
+ dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid,
+ cid.handle);
+ if (rbd_cid_equal(&cid, &my_cid))
+ return false;
+
+ down_read(&rbd_dev->lock_rwsem);
+ need_to_send = __rbd_is_lock_owner(rbd_dev);
+ if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) {
+ if (!rbd_cid_equal(&rbd_dev->owner_cid, &rbd_empty_cid)) {
+ dout("%s rbd_dev %p queueing unlock_work\n", __func__,
+ rbd_dev);
+ queue_work(rbd_dev->task_wq, &rbd_dev->unlock_work);
+ }
+ }
+ up_read(&rbd_dev->lock_rwsem);
+ return need_to_send;
+}
+
+static void __rbd_acknowledge_notify(struct rbd_device *rbd_dev,
+ u64 notify_id, u64 cookie, s32 *result)
+{
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+ int buf_size = 4 + CEPH_ENCODING_START_BLK_LEN;
+ char buf[buf_size];
+ int ret;
+
+ if (result) {
+ void *p = buf;
+
+ /* encode ResponseMessage */
+ ceph_start_encoding(&p, 1, 1,
+ buf_size - CEPH_ENCODING_START_BLK_LEN);
+ ceph_encode_32(&p, *result);
+ } else {
+ buf_size = 0;
+ }
+
+ ret = ceph_osdc_notify_ack(osdc, &rbd_dev->header_oid,
+ &rbd_dev->header_oloc, notify_id, cookie,
+ buf, buf_size);
+ if (ret)
+ rbd_warn(rbd_dev, "acknowledge_notify failed: %d", ret);
+}
+
+static void rbd_acknowledge_notify(struct rbd_device *rbd_dev, u64 notify_id,
+ u64 cookie)
+{
+ dout("%s rbd_dev %p\n", __func__, rbd_dev);
+ __rbd_acknowledge_notify(rbd_dev, notify_id, cookie, NULL);
+}
+
+static void rbd_acknowledge_notify_result(struct rbd_device *rbd_dev,
+ u64 notify_id, u64 cookie, s32 result)
+{
+ dout("%s rbd_dev %p result %d\n", __func__, rbd_dev, result);
+ __rbd_acknowledge_notify(rbd_dev, notify_id, cookie, &result);
+}
+
+static void rbd_watch_cb(void *arg, u64 notify_id, u64 cookie,
+ u64 notifier_id, void *data, size_t data_len)
+{
+ struct rbd_device *rbd_dev = arg;
+ void *p = data;
+ void *const end = p + data_len;
+ u8 struct_v;
+ u32 len;
+ u32 notify_op;
+ int ret;
+
+ dout("%s rbd_dev %p cookie %llu notify_id %llu data_len %zu\n",
+ __func__, rbd_dev, cookie, notify_id, data_len);
+ if (data_len) {
+ ret = ceph_start_decoding(&p, end, 1, "NotifyMessage",
+ &struct_v, &len);
+ if (ret) {
+ rbd_warn(rbd_dev, "failed to decode NotifyMessage: %d",
+ ret);
+ return;
+ }
+
+ notify_op = ceph_decode_32(&p);
+ } else {
+ /* legacy notification for header updates */
+ notify_op = RBD_NOTIFY_OP_HEADER_UPDATE;
+ len = 0;
+ }
+
+ dout("%s rbd_dev %p notify_op %u\n", __func__, rbd_dev, notify_op);
+ switch (notify_op) {
+ case RBD_NOTIFY_OP_ACQUIRED_LOCK:
+ rbd_handle_acquired_lock(rbd_dev, struct_v, &p);
+ rbd_acknowledge_notify(rbd_dev, notify_id, cookie);
+ break;
+ case RBD_NOTIFY_OP_RELEASED_LOCK:
+ rbd_handle_released_lock(rbd_dev, struct_v, &p);
+ rbd_acknowledge_notify(rbd_dev, notify_id, cookie);
+ break;
+ case RBD_NOTIFY_OP_REQUEST_LOCK:
+ if (rbd_handle_request_lock(rbd_dev, struct_v, &p))
+ /*
+ * send ResponseMessage(0) back so the client
+ * can detect a missing owner
+ */
+ rbd_acknowledge_notify_result(rbd_dev, notify_id,
+ cookie, 0);
+ else
+ rbd_acknowledge_notify(rbd_dev, notify_id, cookie);
+ break;
+ case RBD_NOTIFY_OP_HEADER_UPDATE:
+ ret = rbd_dev_refresh(rbd_dev);
+ if (ret)
+ rbd_warn(rbd_dev, "refresh failed: %d", ret);
+
+ rbd_acknowledge_notify(rbd_dev, notify_id, cookie);
+ break;
+ default:
+ if (rbd_is_lock_owner(rbd_dev))
+ rbd_acknowledge_notify_result(rbd_dev, notify_id,
+ cookie, -EOPNOTSUPP);
+ else
+ rbd_acknowledge_notify(rbd_dev, notify_id, cookie);
+ break;
+ }
+}
+
+static void __rbd_unregister_watch(struct rbd_device *rbd_dev);
+
+static void rbd_watch_errcb(void *arg, u64 cookie, int err)
+{
+ struct rbd_device *rbd_dev = arg;
+
+ rbd_warn(rbd_dev, "encountered watch error: %d", err);
+
+ down_write(&rbd_dev->lock_rwsem);
+ rbd_set_owner_cid(rbd_dev, &rbd_empty_cid);
+ up_write(&rbd_dev->lock_rwsem);
+
+ mutex_lock(&rbd_dev->watch_mutex);
+ if (rbd_dev->watch_state == RBD_WATCH_STATE_REGISTERED) {
+ __rbd_unregister_watch(rbd_dev);
+ rbd_dev->watch_state = RBD_WATCH_STATE_ERROR;
+
+ queue_delayed_work(rbd_dev->task_wq, &rbd_dev->watch_dwork, 0);
+ }
+ mutex_unlock(&rbd_dev->watch_mutex);
+}
+
+/*
+ * watch_mutex must be locked
+ */
+static int __rbd_register_watch(struct rbd_device *rbd_dev)
+{
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+ struct ceph_osd_linger_request *handle;
+
+ rbd_assert(!rbd_dev->watch_handle);
+ dout("%s rbd_dev %p\n", __func__, rbd_dev);
+
+ handle = ceph_osdc_watch(osdc, &rbd_dev->header_oid,
+ &rbd_dev->header_oloc, rbd_watch_cb,
+ rbd_watch_errcb, rbd_dev);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ rbd_dev->watch_handle = handle;
+ return 0;
+}
+
+/*
+ * watch_mutex must be locked
+ */
+static void __rbd_unregister_watch(struct rbd_device *rbd_dev)
+{
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+ int ret;
+
+ rbd_assert(rbd_dev->watch_handle);
+ dout("%s rbd_dev %p\n", __func__, rbd_dev);
+
+ ret = ceph_osdc_unwatch(osdc, rbd_dev->watch_handle);
+ if (ret)
+ rbd_warn(rbd_dev, "failed to unwatch: %d", ret);
+
+ rbd_dev->watch_handle = NULL;
+}
+
+static int rbd_register_watch(struct rbd_device *rbd_dev)
+{
+ int ret;
+
+ mutex_lock(&rbd_dev->watch_mutex);
+ rbd_assert(rbd_dev->watch_state == RBD_WATCH_STATE_UNREGISTERED);
+ ret = __rbd_register_watch(rbd_dev);
+ if (ret)
+ goto out;
+
+ rbd_dev->watch_state = RBD_WATCH_STATE_REGISTERED;
+ rbd_dev->watch_cookie = rbd_dev->watch_handle->linger_id;
+
+out:
+ mutex_unlock(&rbd_dev->watch_mutex);
+ return ret;
+}
+
+static void cancel_tasks_sync(struct rbd_device *rbd_dev)
+{
+ dout("%s rbd_dev %p\n", __func__, rbd_dev);
+
+ cancel_delayed_work_sync(&rbd_dev->watch_dwork);
+ cancel_work_sync(&rbd_dev->acquired_lock_work);
+ cancel_work_sync(&rbd_dev->released_lock_work);
+ cancel_delayed_work_sync(&rbd_dev->lock_dwork);
+ cancel_work_sync(&rbd_dev->unlock_work);
+}
+
+static void rbd_unregister_watch(struct rbd_device *rbd_dev)
+{
+ WARN_ON(waitqueue_active(&rbd_dev->lock_waitq));
+ cancel_tasks_sync(rbd_dev);
+
+ mutex_lock(&rbd_dev->watch_mutex);
+ if (rbd_dev->watch_state == RBD_WATCH_STATE_REGISTERED)
+ __rbd_unregister_watch(rbd_dev);
+ rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED;
+ mutex_unlock(&rbd_dev->watch_mutex);
+
+ ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc);
+}
+
+static void rbd_reregister_watch(struct work_struct *work)
+{
+ struct rbd_device *rbd_dev = container_of(to_delayed_work(work),
+ struct rbd_device, watch_dwork);
+ bool was_lock_owner = false;
+ int ret;
+
+ dout("%s rbd_dev %p\n", __func__, rbd_dev);
+
+ down_write(&rbd_dev->lock_rwsem);
+ if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED)
+ was_lock_owner = rbd_release_lock(rbd_dev);
+
+ mutex_lock(&rbd_dev->watch_mutex);
+ if (rbd_dev->watch_state != RBD_WATCH_STATE_ERROR)
+ goto fail_unlock;
+
+ ret = __rbd_register_watch(rbd_dev);
+ if (ret) {
+ rbd_warn(rbd_dev, "failed to reregister watch: %d", ret);
+ if (ret != -EBLACKLISTED)
+ queue_delayed_work(rbd_dev->task_wq,
+ &rbd_dev->watch_dwork,
+ RBD_RETRY_DELAY);
+ goto fail_unlock;
+ }
+
+ rbd_dev->watch_state = RBD_WATCH_STATE_REGISTERED;
+ rbd_dev->watch_cookie = rbd_dev->watch_handle->linger_id;
+ mutex_unlock(&rbd_dev->watch_mutex);
+
+ ret = rbd_dev_refresh(rbd_dev);
+ if (ret)
+ rbd_warn(rbd_dev, "reregisteration refresh failed: %d", ret);
+
+ if (was_lock_owner) {
+ ret = rbd_try_lock(rbd_dev);
+ if (ret)
+ rbd_warn(rbd_dev, "reregisteration lock failed: %d",
+ ret);
+ }
+
+ up_write(&rbd_dev->lock_rwsem);
+ wake_requests(rbd_dev, true);
+ return;
+
+fail_unlock:
+ mutex_unlock(&rbd_dev->watch_mutex);
+ up_write(&rbd_dev->lock_rwsem);
+}
+
+/*
+ * Synchronous osd object method call. Returns the number of bytes
+ * returned in the outbound buffer, or a negative error code.
+ */
+static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
+ const char *object_name,
+ const char *class_name,
+ const char *method_name,
+ const void *outbound,
+ size_t outbound_size,
+ void *inbound,
+ size_t inbound_size)
+{
+ struct rbd_obj_request *obj_request;
+ struct page **pages;
+ u32 page_count;
+ int ret;
+
+ /*
+ * Method calls are ultimately read operations. The result
+ * should placed into the inbound buffer provided. They
+ * also supply outbound data--parameters for the object
+ * method. Currently if this is present it will be a
+ * snapshot id.
+ */
+ page_count = (u32)calc_pages_for(0, inbound_size);
+ pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
+ if (IS_ERR(pages))
+ return PTR_ERR(pages);
+
+ ret = -ENOMEM;
+ obj_request = rbd_obj_request_create(object_name, 0, inbound_size,
+ OBJ_REQUEST_PAGES);
+ if (!obj_request)
+ goto out;
+
+ obj_request->pages = pages;
+ obj_request->page_count = page_count;
+
+ obj_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1,
+ obj_request);
+ if (!obj_request->osd_req)
+ goto out;
+
+ osd_req_op_cls_init(obj_request->osd_req, 0, CEPH_OSD_OP_CALL,
+ class_name, method_name);
+ if (outbound_size) {
+ struct ceph_pagelist *pagelist;
+
+ pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS);
+ if (!pagelist)
+ goto out;
+
+ ceph_pagelist_init(pagelist);
+ ceph_pagelist_append(pagelist, outbound, outbound_size);
+ osd_req_op_cls_request_data_pagelist(obj_request->osd_req, 0,
+ pagelist);
+ }
+ osd_req_op_cls_response_data_pages(obj_request->osd_req, 0,
+ obj_request->pages, inbound_size,
+ 0, false, false);
+ rbd_osd_req_format_read(obj_request);
+
+ rbd_obj_request_submit(obj_request);
+ ret = rbd_obj_request_wait(obj_request);
+ if (ret)
+ goto out;
+
+ ret = obj_request->result;
+ if (ret < 0)
+ goto out;
+
+ rbd_assert(obj_request->xferred < (u64)INT_MAX);
+ ret = (int)obj_request->xferred;
+ ceph_copy_from_page_vector(pages, inbound, 0, obj_request->xferred);
+out:
+ if (obj_request)
+ rbd_obj_request_put(obj_request);
+ else
+ ceph_release_page_vector(pages, page_count);
+
+ return ret;
+}
+
+/*
+ * lock_rwsem must be held for read
+ */
+static void rbd_wait_state_locked(struct rbd_device *rbd_dev)
+{
+ DEFINE_WAIT(wait);
+
+ do {
+ /*
+ * Note the use of mod_delayed_work() in rbd_acquire_lock()
+ * and cancel_delayed_work() in wake_requests().
+ */
+ dout("%s rbd_dev %p queueing lock_dwork\n", __func__, rbd_dev);
+ queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0);
+ prepare_to_wait_exclusive(&rbd_dev->lock_waitq, &wait,
+ TASK_UNINTERRUPTIBLE);
+ up_read(&rbd_dev->lock_rwsem);
+ schedule();
+ down_read(&rbd_dev->lock_rwsem);
+ } while (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED);
+ finish_wait(&rbd_dev->lock_waitq, &wait);
+}
+
+static void rbd_queue_workfn(struct work_struct *work)
+{
+ struct request *rq = blk_mq_rq_from_pdu(work);
+ struct rbd_device *rbd_dev = rq->q->queuedata;
+ struct rbd_img_request *img_request;
+ struct ceph_snap_context *snapc = NULL;
+ u64 offset = (u64)blk_rq_pos(rq) << SECTOR_SHIFT;
+ u64 length = blk_rq_bytes(rq);
+ enum obj_operation_type op_type;