+++ /dev/null
- /*
- * Copyright (C) 2010 The Chromium OS Authors <chromium-os-dev@chromium.org>
- *
- * Device-Mapper block hash tree interface.
- * See Documentation/device-mapper/dm-bht.txt for details.
- *
- * This file is released under the GPL.
- */
-
-#include <asm/atomic.h>
-#include <asm/page.h>
-#include <linux/bitops.h> /* for fls() */
-#include <linux/bug.h>
-#include <linux/cpumask.h> /* nr_cpu_ids */
-/* #define CONFIG_DM_DEBUG 1 */
-#include <linux/device-mapper.h>
-#include <linux/err.h>
-#include <linux/errno.h>
-#include <linux/gfp.h>
-#include <linux/dm-bht.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mm_types.h>
-#include <linux/scatterlist.h>
-#include <linux/slab.h> /* k*alloc */
-#include <linux/string.h> /* memset */
-
-#define DM_MSG_PREFIX "dm bht"
-
-/* For sector formatting. */
-#if defined(_LP64) || defined(__LP64__) || __BITS_PER_LONG == 64
-#define __PRIS_PREFIX "z"
-#else
-#define __PRIS_PREFIX "ll"
-#endif
-#define PRIu64 __PRIS_PREFIX "u"
-
-
-/*-----------------------------------------------
- * Utilities
- *-----------------------------------------------*/
-
-static u8 from_hex(u8 ch)
-{
- if ((ch >= '0') && (ch <= '9'))
- return ch - '0';
- if ((ch >= 'a') && (ch <= 'f'))
- return ch - 'a' + 10;
- if ((ch >= 'A') && (ch <= 'F'))
- return ch - 'A' + 10;
- return -1;
-}
-
-/**
- * dm_bht_bin_to_hex - converts a binary stream to human-readable hex
- * @binary: a byte array of length @binary_len
- * @hex: a byte array of length @binary_len * 2 + 1
- */
-static void dm_bht_bin_to_hex(u8 *binary, u8 *hex, unsigned int binary_len)
-{
- while (binary_len-- > 0) {
- sprintf((char *__restrict__)hex, "%02hhx", (int)*binary);
- hex += 2;
- binary++;
- }
-}
-
-/**
- * dm_bht_hex_to_bin - converts a hex stream to binary
- * @binary: a byte array of length @binary_len
- * @hex: a byte array of length @binary_len * 2 + 1
- */
-static void dm_bht_hex_to_bin(u8 *binary, const u8 *hex,
- unsigned int binary_len)
-{
- while (binary_len-- > 0) {
- *binary = from_hex(*(hex++));
- *binary *= 16;
- *binary += from_hex(*(hex++));
- binary++;
- }
-}
-
-static void dm_bht_log_mismatch(struct dm_bht *bht, u8 *given, u8 *computed)
-{
- u8 given_hex[DM_BHT_MAX_DIGEST_SIZE * 2 + 1];
- u8 computed_hex[DM_BHT_MAX_DIGEST_SIZE * 2 + 1];
- dm_bht_bin_to_hex(given, given_hex, bht->digest_size);
- dm_bht_bin_to_hex(computed, computed_hex, bht->digest_size);
- DMERR_LIMIT("%s != %s", given_hex, computed_hex);
-}
-
-/* Used for turning verifiers into computers */
-typedef int (*dm_bht_compare_cb)(struct dm_bht *, u8 *, u8 *);
-
-/**
- * dm_bht_compute_hash: hashes a page of data
- */
-static int dm_bht_compute_hash(struct dm_bht *bht, struct page *pg,
- unsigned int offset, u8 *digest)
-{
- struct hash_desc *hash_desc = &bht->hash_desc[smp_processor_id()];
- struct scatterlist sg;
-
- sg_init_table(&sg, 1);
- sg_set_page(&sg, pg, PAGE_SIZE, offset);
- /* Note, this is synchronous. */
- if (crypto_hash_init(hash_desc)) {
- DMCRIT("failed to reinitialize crypto hash (proc:%d)",
- smp_processor_id());
- return -EINVAL;
- }
- if (crypto_hash_update(hash_desc, &sg, PAGE_SIZE)) {
- DMCRIT("crypto_hash_update failed");
- return -EINVAL;
- }
- if (bht->have_salt) {
- sg_set_buf(&sg, bht->salt, sizeof(bht->salt));
- if (crypto_hash_update(hash_desc, &sg, sizeof(bht->salt))) {
- DMCRIT("crypto_hash_update failed");
- return -EINVAL;
- }
- }
- if (crypto_hash_final(hash_desc, digest)) {
- DMCRIT("crypto_hash_final failed");
- return -EINVAL;
- }
-
- return 0;
-}
-
-static __always_inline struct dm_bht_level *dm_bht_get_level(struct dm_bht *bht,
- int depth)
-{
- return &bht->levels[depth];
-}
-
-static __always_inline unsigned int dm_bht_get_level_shift(struct dm_bht *bht,
- int depth)
-{
- return (bht->depth - depth) * bht->node_count_shift;
-}
-
-/* For the given depth, this is the entry index. At depth+1 it is the node
- * index for depth.
- */
-static __always_inline unsigned int dm_bht_index_at_level(struct dm_bht *bht,
- int depth,
- unsigned int leaf)
-{
- return leaf >> dm_bht_get_level_shift(bht, depth);
-}
-
-static __always_inline u8 *dm_bht_node(struct dm_bht *bht,
- struct dm_bht_entry *entry,
- unsigned int node_index)
-{
- return &entry->nodes[node_index * bht->digest_size];
-}
-
-static inline struct dm_bht_entry *dm_bht_get_entry(struct dm_bht *bht,
- int depth,
- unsigned int block)
-{
- unsigned int index = dm_bht_index_at_level(bht, depth, block);
- struct dm_bht_level *level = dm_bht_get_level(bht, depth);
-
- BUG_ON(index >= level->count);
-
- return &level->entries[index];
-}
-
-static inline u8 *dm_bht_get_node(struct dm_bht *bht,
- struct dm_bht_entry *entry,
- int depth,
- unsigned int block)
-{
- unsigned int index = dm_bht_index_at_level(bht, depth, block);
-
- return dm_bht_node(bht, entry, index % bht->node_count);
-}
-
-
-/*-----------------------------------------------
- * Implementation functions
- *-----------------------------------------------*/
-
-static int dm_bht_initialize_entries(struct dm_bht *bht);
-
-static int dm_bht_read_callback_stub(void *ctx, sector_t start, u8 *dst,
- sector_t count,
- struct dm_bht_entry *entry);
-static int dm_bht_write_callback_stub(void *ctx, sector_t start,
- u8 *dst, sector_t count,
- struct dm_bht_entry *entry);
-
-/**
- * dm_bht_create - prepares @bht for us
- * @bht: pointer to a dm_bht_create()d bht
- * @depth: tree depth without the root; including block hashes
- * @block_count:the number of block hashes / tree leaves
- * @alg_name: crypto hash algorithm name
- *
- * Returns 0 on success.
- *
- * Callers can offset into devices by storing the data in the io callbacks.
- * TODO(wad) bust up into smaller helpers
- */
-int dm_bht_create(struct dm_bht *bht, unsigned int block_count,
- const char *alg_name)
-{
- int status = 0;
- int cpu = 0;
-
- bht->have_salt = false;
-
- /* Setup the hash first. Its length determines much of the bht layout */
- for (cpu = 0; cpu < nr_cpu_ids; ++cpu) {
- bht->hash_desc[cpu].tfm = crypto_alloc_hash(alg_name, 0, 0);
- if (IS_ERR(bht->hash_desc[cpu].tfm)) {
- DMERR("failed to allocate crypto hash '%s'", alg_name);
- status = -ENOMEM;
- bht->hash_desc[cpu].tfm = NULL;
- goto bad_hash_alg;
- }
- }
- bht->digest_size = crypto_hash_digestsize(bht->hash_desc[0].tfm);
- /* We expect to be able to pack >=2 hashes into a page */
- if (PAGE_SIZE / bht->digest_size < 2) {
- DMERR("too few hashes fit in a page");
- status = -EINVAL;
- goto bad_digest_len;
- }
-
- if (bht->digest_size > DM_BHT_MAX_DIGEST_SIZE) {
- DMERR("DM_BHT_MAX_DIGEST_SIZE too small for chosen digest");
- status = -EINVAL;
- goto bad_digest_len;
- }
-
- /* Configure the tree */
- bht->block_count = block_count;
- DMDEBUG("Setting block_count %u", block_count);
- if (block_count == 0) {
- DMERR("block_count must be non-zero");
- status = -EINVAL;
- goto bad_block_count;
- }
-
- /* Each dm_bht_entry->nodes is one page. The node code tracks
- * how many nodes fit into one entry where a node is a single
- * hash (message digest).
- */
- bht->node_count_shift = fls(PAGE_SIZE / bht->digest_size) - 1;
- /* Round down to the nearest power of two. This makes indexing
- * into the tree much less painful.
- */
- bht->node_count = 1 << bht->node_count_shift;
-
- /* This is unlikely to happen, but with 64k pages, who knows. */
- if (bht->node_count > UINT_MAX / bht->digest_size) {
- DMERR("node_count * hash_len exceeds UINT_MAX!");
- status = -EINVAL;
- goto bad_node_count;
- }
-
- bht->depth = DIV_ROUND_UP(fls(block_count - 1), bht->node_count_shift);
- DMDEBUG("Setting depth to %d.", bht->depth);
-
- /* Ensure that we can safely shift by this value. */
- if (bht->depth * bht->node_count_shift >= sizeof(unsigned int) * 8) {
- DMERR("specified depth and node_count_shift is too large");
- status = -EINVAL;
- goto bad_node_count;
- }
-
- /* Allocate levels. Each level of the tree may have an arbitrary number
- * of dm_bht_entry structs. Each entry contains node_count nodes.
- * Each node in the tree is a cryptographic digest of either node_count
- * nodes on the subsequent level or of a specific block on disk.
- */
- bht->levels = (struct dm_bht_level *)
- kcalloc(bht->depth,
- sizeof(struct dm_bht_level), GFP_KERNEL);
- if (!bht->levels) {
- DMERR("failed to allocate tree levels");
- status = -ENOMEM;
- goto bad_level_alloc;
- }
-
- /* Setup callback stubs */
- bht->read_cb = &dm_bht_read_callback_stub;
- bht->write_cb = &dm_bht_write_callback_stub;
-
- status = dm_bht_initialize_entries(bht);
- if (status)
- goto bad_entries_alloc;
-
- /* We compute depth such that there is only be 1 block at level 0. */
- BUG_ON(bht->levels[0].count != 1);
-
- return 0;
-
-bad_entries_alloc:
- while (bht->depth-- > 0)
- kfree(bht->levels[bht->depth].entries);
- kfree(bht->levels);
-bad_node_count:
-bad_level_alloc:
-bad_block_count:
-bad_digest_len:
-bad_hash_alg:
- for (cpu = 0; cpu < nr_cpu_ids; ++cpu)
- if (bht->hash_desc[cpu].tfm)
- crypto_free_hash(bht->hash_desc[cpu].tfm);
- return status;
-}
-EXPORT_SYMBOL(dm_bht_create);
-
-static int dm_bht_initialize_entries(struct dm_bht *bht)
-{
- /* The last_index represents the index into the last
- * block digest that will be stored in the tree. By walking the
- * tree with that index, it is possible to compute the total number
- * of entries needed at each level in the tree.
- *
- * Since each entry will contain up to |node_count| nodes of the tree,
- * it is possible that the last index may not be at the end of a given
- * entry->nodes. In that case, it is assumed the value is padded.
- *
- * Note, we treat both the tree root (1 hash) and the tree leaves
- * independently from the bht data structures. Logically, the root is
- * depth=-1 and the block layer level is depth=bht->depth
- */
- unsigned int last_index = ALIGN(bht->block_count, bht->node_count) - 1;
- unsigned int total_entries = 0;
- struct dm_bht_level *level = NULL;
- int depth;
-
- /* check that the largest level->count can't result in an int overflow
- * on allocation or sector calculation.
- */
- if (((last_index >> bht->node_count_shift) + 1) >
- UINT_MAX / max((unsigned int)sizeof(struct dm_bht_entry),
- (unsigned int)to_sector(PAGE_SIZE))) {
- DMCRIT("required entries %u is too large",
- last_index + 1);
- return -EINVAL;
- }
-
- /* Track the current sector location for each level so we don't have to
- * compute it during traversals.
- */
- bht->sectors = 0;
- for (depth = 0; depth < bht->depth; ++depth) {
- level = dm_bht_get_level(bht, depth);
- level->count = dm_bht_index_at_level(bht, depth,
- last_index) + 1;
- DMDEBUG("depth: %d entries: %u", depth, level->count);
- /* TODO(wad) consider the case where the data stored for each
- * level is done with contiguous pages (instead of using
- * entry->nodes) and the level just contains two bitmaps:
- * (a) which pages have been loaded from disk
- * (b) which specific nodes have been verified.
- */
- level->entries = (struct dm_bht_entry *)
- kcalloc(level->count,
- sizeof(struct dm_bht_entry),
- GFP_KERNEL);
- if (!level->entries) {
- DMERR("failed to allocate entries for depth %d",
- bht->depth);
- /* let the caller clean up the mess */
- return -ENOMEM;
- }
- total_entries += level->count;
- level->sector = bht->sectors;
- /* number of sectors per entry * entries at this level */
- bht->sectors += level->count * to_sector(PAGE_SIZE);
- /* not ideal, but since unsigned overflow behavior is defined */
- if (bht->sectors < level->sector) {
- DMCRIT("level sector calculation overflowed");
- return -EINVAL;
- }
- }
-
- return 0;
-}
-
-static int dm_bht_read_callback_stub(void *ctx, sector_t start, u8 *dst,
- sector_t count, struct dm_bht_entry *entry)
-{
- DMCRIT("dm_bht_read_callback_stub called!");
- dm_bht_read_completed(entry, -EIO);
- return -EIO;
-}
-
-static int dm_bht_write_callback_stub(void *ctx, sector_t start,
- u8 *dst, sector_t count,
- struct dm_bht_entry *entry)
-{
- DMCRIT("dm_bht_write_callback_stub called!");
- dm_bht_write_completed(entry, -EIO);
- return -EIO;
-}
-
-/**
- * dm_bht_read_completed
- * @entry: pointer to the entry that's been loaded
- * @status: I/O status. Non-zero is failure.
- * MUST always be called after a read_cb completes.
- */
-void dm_bht_read_completed(struct dm_bht_entry *entry, int status)
-{
- if (status) {
- /* TODO(wad) add retry support */
- DMCRIT("an I/O error occurred while reading entry");
- atomic_set(&entry->state, DM_BHT_ENTRY_ERROR_IO);
- /* entry->nodes will be freed later */
- return;
- }
- BUG_ON(atomic_read(&entry->state) != DM_BHT_ENTRY_PENDING);
- atomic_set(&entry->state, DM_BHT_ENTRY_READY);
-}
-EXPORT_SYMBOL(dm_bht_read_completed);
-
-/**
- * dm_bht_write_completed
- * @entry: pointer to the entry that's been loaded
- * @status: I/O status. Non-zero is failure.
- * Should be called after a write_cb completes. Currently only catches
- * errors which more writers don't care about.
- */
-void dm_bht_write_completed(struct dm_bht_entry *entry, int status)
-{
- if (status) {
- DMCRIT("an I/O error occurred while writing entry");
- atomic_set(&entry->state, DM_BHT_ENTRY_ERROR_IO);
- /* entry->nodes will be freed later */
- return;
- }
-}
-EXPORT_SYMBOL(dm_bht_write_completed);
-
-/* dm_bht_verify_path
- * Verifies the path. Returns 0 on ok.
- */
-static int dm_bht_verify_path(struct dm_bht *bht, unsigned int block,
- struct page *pg, unsigned int offset)
-{
- int depth = bht->depth;
- u8 digest[DM_BHT_MAX_DIGEST_SIZE];
- struct dm_bht_entry *entry;
- u8 *node;
- int state;
-
- do {
- /* Need to check that the hash of the current block is accurate
- * in its parent.
- */
- entry = dm_bht_get_entry(bht, depth - 1, block);
- state = atomic_read(&entry->state);
- /* This call is only safe if all nodes along the path
- * are already populated (i.e. READY) via dm_bht_populate.
- */
- BUG_ON(state < DM_BHT_ENTRY_READY);
- node = dm_bht_get_node(bht, entry, depth, block);
-
- if (dm_bht_compute_hash(bht, pg, offset, digest) ||
- memcmp(digest, node, bht->digest_size))
- goto mismatch;
-
- /* Keep the containing block of hashes to be verified in the
- * next pass.
- */
- pg = virt_to_page(entry->nodes);
- offset = 0;
- } while (--depth > 0 && state != DM_BHT_ENTRY_VERIFIED);
-
- if (depth == 0 && state != DM_BHT_ENTRY_VERIFIED) {
- if (dm_bht_compute_hash(bht, pg, offset, digest) ||
- memcmp(digest, bht->root_digest, bht->digest_size))
- goto mismatch;
- atomic_set(&entry->state, DM_BHT_ENTRY_VERIFIED);
- }
-
- /* Mark path to leaf as verified. */
- for (depth++; depth < bht->depth; depth++) {
- entry = dm_bht_get_entry(bht, depth, block);
- /* At this point, entry can only be in VERIFIED or READY state.
- * So it is safe to use atomic_set instead of atomic_cmpxchg.
- */
- atomic_set(&entry->state, DM_BHT_ENTRY_VERIFIED);
- }
-
- DMDEBUG("verify_path: node %u is verified to root", block);
- return 0;
-
-mismatch:
- DMERR_LIMIT("verify_path: failed to verify hash (d=%d,bi=%u)",
- depth, block);
- dm_bht_log_mismatch(bht, node, digest);
- return DM_BHT_ENTRY_ERROR_MISMATCH;
-}
-
-/**
- * dm_bht_store_block - sets a given block's hash in the tree
- * @bht: pointer to a dm_bht_create()d bht
- * @block: numeric index of the block in the tree
- * @digest: array of u8s containing the digest of length @bht->digest_size
- *
- * Returns 0 on success, >0 when data is pending, and <0 when a IO or other
- * error has occurred.
- *
- * If the containing entry in the tree is unallocated, it will allocate memory
- * and mark the entry as ready. All other block entries will be 0s. This
- * function is not safe for simultaneous use when verifying data and should not
- * be used if the @bht is being accessed by any other functions in any other
- * threads/processes.
- *
- * It is expected that virt_to_page will work on |block_data|.
- */
-int dm_bht_store_block(struct dm_bht *bht, unsigned int block,
- u8 *block_data)
-{
- int depth;
- unsigned int index;
- unsigned int node_index;
- struct dm_bht_entry *entry;
- struct dm_bht_level *level;
- int state;
- struct page *node_page = NULL;
-
- /* Look at the last level of nodes above the leaves (data blocks) */
- depth = bht->depth - 1;
-
- /* Index into the level */
- level = dm_bht_get_level(bht, depth);
- index = dm_bht_index_at_level(bht, depth, block);
- /* Grab the node index into the current entry by getting the
- * index at the leaf-level.
- */
- node_index = dm_bht_index_at_level(bht, depth + 1, block) %
- bht->node_count;
- entry = &level->entries[index];
-
- DMDEBUG("Storing block %u in d=%d,ei=%u,ni=%u,s=%d",
- block, depth, index, node_index,
- atomic_read(&entry->state));
-
- state = atomic_cmpxchg(&entry->state,
- DM_BHT_ENTRY_UNALLOCATED,
- DM_BHT_ENTRY_PENDING);
- /* !!! Note. It is up to the users of the update interface to
- * ensure the entry data is fully populated prior to use.
- * The number of updated entries is NOT tracked.
- */
- if (state == DM_BHT_ENTRY_UNALLOCATED) {
- node_page = alloc_page(GFP_KERNEL);
- if (!node_page) {
- atomic_set(&entry->state, DM_BHT_ENTRY_ERROR);
- return -ENOMEM;
- }
- entry->nodes = page_address(node_page);
- memset(entry->nodes, 0, PAGE_SIZE);
- /* TODO(wad) could expose this to the caller to that they
- * can transition from unallocated to ready manually.
- */
- atomic_set(&entry->state, DM_BHT_ENTRY_READY);
- } else if (state <= DM_BHT_ENTRY_ERROR) {
- DMCRIT("leaf entry for block %u is invalid",
- block);
- return state;
- } else if (state == DM_BHT_ENTRY_PENDING) {
- DMERR("leaf data is pending for block %u", block);
- return 1;
- }
-
- dm_bht_compute_hash(bht, virt_to_page(block_data), 0,
- dm_bht_node(bht, entry, node_index));
- return 0;
-}
-EXPORT_SYMBOL(dm_bht_store_block);
-
-/**
- * dm_bht_zeroread_callback - read callback which always returns 0s
- * @ctx: ignored
- * @start: ignored
- * @data: buffer to write 0s to
- * @count: number of sectors worth of data to write
- * @complete_ctx: opaque context for @completed
- * @completed: callback to confirm end of data read
- *
- * Always returns 0.
- *
- * Meant for use by dm_compute() callers. It allows dm_populate to
- * be used to pre-fill a tree with zeroed out entry nodes.
- */
-int dm_bht_zeroread_callback(void *ctx, sector_t start, u8 *dst,
- sector_t count, struct dm_bht_entry *entry)
-{
- memset(dst, 0, to_bytes(count));
- dm_bht_read_completed(entry, 0);
- return 0;
-}
-EXPORT_SYMBOL(dm_bht_zeroread_callback);
-
-/**
- * dm_bht_compute - computes and updates all non-block-level hashes in a tree
- * @bht: pointer to a dm_bht_create()d bht
- * @read_cb_ctx:opaque read_cb context for all I/O on this call
- *
- * Returns 0 on success, >0 when data is pending, and <0 when a IO or other
- * error has occurred.
- *
- * Walks the tree and computes the hashes at each level from the
- * hashes below. This can only be called once per tree creation
- * since it will mark entries verified. Expects dm_bht_populate() to
- * correctly populate the tree from the read_callback_stub.
- *
- * This function should not be used when verifying the same tree and
- * should not be used with multiple simultaneous operators on @bht.
- */
-int dm_bht_compute(struct dm_bht *bht, void *read_cb_ctx)
-{
- int depth, r = 0;
-
- for (depth = bht->depth - 2; depth >= 0; depth--) {
- struct dm_bht_level *level = dm_bht_get_level(bht, depth);
- struct dm_bht_level *child_level = level + 1;
- struct dm_bht_entry *entry = level->entries;
- struct dm_bht_entry *child = child_level->entries;
- unsigned int i, j;
-
- for (i = 0; i < level->count; i++, entry++) {
- unsigned int count = bht->node_count;
- struct page *pg;
-
- pg = alloc_page(GFP_NOIO);
- if (!pg) {
- DMCRIT("an error occurred while reading entry");
- goto out;
- }
-
- entry->nodes = page_address(pg);
- memset(entry->nodes, 0, PAGE_SIZE);
- atomic_set(&entry->state, DM_BHT_ENTRY_READY);
-
- if (i == (level->count - 1))
- count = child_level->count % bht->node_count;
- if (count == 0)
- count = bht->node_count;
- for (j = 0; j < count; j++, child++) {
- struct page *pg = virt_to_page(child->nodes);
- u8 *digest = dm_bht_node(bht, entry, j);
-
- r = dm_bht_compute_hash(bht, pg, 0, digest);
- if (r) {
- DMERR("Failed to update (d=%d,i=%u)",
- depth, i);
- goto out;
- }
- }
- }
- }
- r = dm_bht_compute_hash(bht,
- virt_to_page(bht->levels[0].entries->nodes),
- 0, bht->root_digest);
- if (r)
- DMERR("Failed to update root hash");
-
-out:
- return r;
-}
-EXPORT_SYMBOL(dm_bht_compute);
-
-/**
- * dm_bht_sync - writes the tree in memory to disk
- * @bht: pointer to a dm_bht_create()d bht
- * @write_ctx: callback context for writes issued
- *
- * Since all entry nodes are PAGE_SIZE, the data will be pre-aligned and
- * padded.
- */
-int dm_bht_sync(struct dm_bht *bht, void *write_cb_ctx)
-{
- int depth;
- int ret = 0;
- int state;
- sector_t sector;
- struct dm_bht_level *level;
- struct dm_bht_entry *entry;
- struct dm_bht_entry *entry_end;
-
- for (depth = 0; depth < bht->depth; ++depth) {
- level = dm_bht_get_level(bht, depth);
- entry_end = level->entries + level->count;
- sector = level->sector;
- for (entry = level->entries; entry < entry_end; ++entry) {
- state = atomic_read(&entry->state);
- if (state <= DM_BHT_ENTRY_PENDING) {
- DMERR("At depth %d, entry %lu is not ready",
- depth,
- (unsigned long)(entry - level->entries));
- return state;
- }
- ret = bht->write_cb(write_cb_ctx,
- sector,
- entry->nodes,
- to_sector(PAGE_SIZE),
- entry);
- if (ret) {
- DMCRIT("an error occurred writing entry %lu",
- (unsigned long)(entry - level->entries));
- return ret;
- }
- sector += to_sector(PAGE_SIZE);
- }
- }
-
- return 0;
-}
-EXPORT_SYMBOL(dm_bht_sync);
-
-/**
- * dm_bht_is_populated - check that entries from disk needed to verify a given
- * block are all ready
- * @bht: pointer to a dm_bht_create()d bht
- * @block: specific block data is expected from
- *
- * Callers may wish to call dm_bht_is_populated() when checking an io
- * for which entries were already pending.
- */
-bool dm_bht_is_populated(struct dm_bht *bht, unsigned int block)
-{
- int depth;
-
- for (depth = bht->depth - 1; depth >= 0; depth--) {
- struct dm_bht_entry *entry = dm_bht_get_entry(bht, depth,
- block);
- if (atomic_read(&entry->state) < DM_BHT_ENTRY_READY)
- return false;
- }
-
- return true;
-}
-EXPORT_SYMBOL(dm_bht_is_populated);
-
-/**
- * dm_bht_populate - reads entries from disk needed to verify a given block
- * @bht: pointer to a dm_bht_create()d bht
- * @ctx: context used for all read_cb calls on this request
- * @block: specific block data is expected from
- *
- * Returns negative value on error. Returns 0 on success.
- */
-int dm_bht_populate(struct dm_bht *bht, void *ctx,
- unsigned int block)
-{
- int depth;
- int state = 0;
-
- BUG_ON(block >= bht->block_count);
-
- DMDEBUG("dm_bht_populate(%u)", block);
-
- for (depth = bht->depth - 1; depth >= 0; --depth) {
- struct dm_bht_level *level;
- struct dm_bht_entry *entry;
- unsigned int index;
- struct page *pg;
-
- entry = dm_bht_get_entry(bht, depth, block);
- state = atomic_cmpxchg(&entry->state,
- DM_BHT_ENTRY_UNALLOCATED,
- DM_BHT_ENTRY_PENDING);
-
- if (state == DM_BHT_ENTRY_VERIFIED)
- break;
- if (state <= DM_BHT_ENTRY_ERROR)
- goto error_state;
- if (state != DM_BHT_ENTRY_UNALLOCATED)
- continue;
-
- /* Current entry is claimed for allocation and loading */
- pg = alloc_page(GFP_NOIO);
- if (!pg)
- goto nomem;
-
- /* dm-bht guarantees page-aligned memory for callbacks. */
- entry->nodes = page_address(pg);
-
- /* TODO(wad) error check callback here too */
-
- level = &bht->levels[depth];
- index = dm_bht_index_at_level(bht, depth, block);
- bht->read_cb(ctx, level->sector + to_sector(index * PAGE_SIZE),
- entry->nodes, to_sector(PAGE_SIZE), entry);
- }
-
- return 0;
-
-error_state:
- DMCRIT("block %u at depth %d is in an error state", block, depth);
- return state;
-
-nomem:
- DMCRIT("failed to allocate memory for entry->nodes");
- return -ENOMEM;
-}
-EXPORT_SYMBOL(dm_bht_populate);
-
-
-/**
- * dm_bht_verify_block - checks that all nodes in the path for @block are valid
- * @bht: pointer to a dm_bht_create()d bht
- * @block: specific block data is expected from
- * @pg: page holding the block data
- * @offset: offset into the page
- *
- * Returns 0 on success, 1 on missing data, and a negative error
- * code on verification failure. All supporting functions called
- * should return similarly.
- */
-int dm_bht_verify_block(struct dm_bht *bht, unsigned int block,
- struct page *pg, unsigned int offset)
-{
- BUG_ON(offset != 0);
-
- return dm_bht_verify_path(bht, block, pg, offset);
-}
-EXPORT_SYMBOL(dm_bht_verify_block);
-
-/**
- * dm_bht_destroy - cleans up all memory used by @bht
- * @bht: pointer to a dm_bht_create()d bht
- *
- * Returns 0 on success. Does not free @bht itself.
- */
-int dm_bht_destroy(struct dm_bht *bht)
-{
- int depth;
- int cpu = 0;
-
- depth = bht->depth;
- while (depth-- != 0) {
- struct dm_bht_entry *entry = bht->levels[depth].entries;
- struct dm_bht_entry *entry_end = entry +
- bht->levels[depth].count;
- int state = 0;
- for (; entry < entry_end; ++entry) {
- state = atomic_read(&entry->state);
- switch (state) {
- /* At present, no other states free memory,
- * but that will change.
- */
- case DM_BHT_ENTRY_UNALLOCATED:
- /* Allocated with improper state */
- BUG_ON(entry->nodes);
- continue;
- default:
- BUG_ON(!entry->nodes);
- __free_page(virt_to_page(entry->nodes));
- break;
- }
- }
- kfree(bht->levels[depth].entries);
- bht->levels[depth].entries = NULL;
- }
- kfree(bht->levels);
- for (cpu = 0; cpu < nr_cpu_ids; ++cpu)
- if (bht->hash_desc[cpu].tfm)
- crypto_free_hash(bht->hash_desc[cpu].tfm);
- return 0;
-}
-EXPORT_SYMBOL(dm_bht_destroy);
-
-/*-----------------------------------------------
- * Accessors
- *-----------------------------------------------*/
-
-/**
- * dm_bht_sectors - return the sectors required on disk
- * @bht: pointer to a dm_bht_create()d bht
- */
-sector_t dm_bht_sectors(const struct dm_bht *bht)
-{
- return bht->sectors;
-}
-EXPORT_SYMBOL(dm_bht_sectors);
-
-/**
- * dm_bht_set_read_cb - set read callback
- * @bht: pointer to a dm_bht_create()d bht
- * @read_cb: callback function used for all read requests by @bht
- */
-void dm_bht_set_read_cb(struct dm_bht *bht, dm_bht_callback read_cb)
-{
- bht->read_cb = read_cb;
-}
-EXPORT_SYMBOL(dm_bht_set_read_cb);
-
-/**
- * dm_bht_set_write_cb - set write callback
- * @bht: pointer to a dm_bht_create()d bht
- * @write_cb: callback function used for all write requests by @bht
- */
-void dm_bht_set_write_cb(struct dm_bht *bht, dm_bht_callback write_cb)
-{
- bht->write_cb = write_cb;
-}
-EXPORT_SYMBOL(dm_bht_set_write_cb);
-
-/**
- * dm_bht_set_root_hexdigest - sets an unverified root digest hash from hex
- * @bht: pointer to a dm_bht_create()d bht
- * @hexdigest: array of u8s containing the new digest in binary
- * Returns non-zero on error. hexdigest should be NUL terminated.
- */
-int dm_bht_set_root_hexdigest(struct dm_bht *bht, const u8 *hexdigest)
-{
- /* Make sure we have at least the bytes expected */
- if (strnlen((char *)hexdigest, bht->digest_size * 2) !=
- bht->digest_size * 2) {
- DMERR("root digest length does not match hash algorithm");
- return -1;
- }
- dm_bht_hex_to_bin(bht->root_digest, hexdigest, bht->digest_size);
-#ifdef CONFIG_DM_DEBUG
- DMINFO("Set root digest to %s. Parsed as -> ", hexdigest);
- dm_bht_log_mismatch(bht, bht->root_digest, bht->root_digest);
-#endif
- return 0;
-}
-EXPORT_SYMBOL(dm_bht_set_root_hexdigest);
-
-/**
- * dm_bht_root_hexdigest - returns root digest in hex
- * @bht: pointer to a dm_bht_create()d bht
- * @hexdigest: u8 array of size @available
- * @available: must be bht->digest_size * 2 + 1
- */
-int dm_bht_root_hexdigest(struct dm_bht *bht, u8 *hexdigest, int available)
-{
- if (available < 0 ||
- ((unsigned int) available) < bht->digest_size * 2 + 1) {
- DMERR("hexdigest has too few bytes available");
- return -EINVAL;
- }
- dm_bht_bin_to_hex(bht->root_digest, hexdigest, bht->digest_size);
- return 0;
-}
-EXPORT_SYMBOL(dm_bht_root_hexdigest);
-
-/**
- * dm_bht_set_salt - sets the salt used, in hex
- * @bht: pointer to a dm_bht_create()d bht
- * @hexsalt: salt string, as hex; will be zero-padded or truncated to
- * DM_BHT_SALT_SIZE * 2 hex digits.
- */
-void dm_bht_set_salt(struct dm_bht *bht, const char *hexsalt)
-{
- size_t saltlen = min(strlen(hexsalt) / 2, sizeof(bht->salt));
- bht->have_salt = true;
- memset(bht->salt, 0, sizeof(bht->salt));
- dm_bht_hex_to_bin(bht->salt, (const u8 *)hexsalt, saltlen);
-}
-
-/**
- * dm_bht_salt - returns the salt used, in hex
- * @bht: pointer to a dm_bht_create()d bht
- * @hexsalt: buffer to put salt into, of length DM_BHT_SALT_SIZE * 2 + 1.
- */
-int dm_bht_salt(struct dm_bht *bht, char *hexsalt)
-{
- if (!bht->have_salt)
- return -EINVAL;
- dm_bht_bin_to_hex(bht->salt, (u8 *)hexsalt, sizeof(bht->salt));
- return 0;
-}
/*
- * Originally based on dm-crypt.c,
- * Copyright (C) 2003 Christophe Saout <christophe@saout.de>
- * Copyright (C) 2004 Clemens Fruhwirth <clemens@endorphin.org>
- * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved.
- * Copyright (C) 2010 The Chromium OS Authors <chromium-os-dev@chromium.org>
- * All Rights Reserved.
+ * Copyright (C) 2012 Red Hat, Inc.
*
- * This file is released under the GPL.
+ * Author: Mikulas Patocka <mpatocka@redhat.com>
*
- * Implements a verifying transparent block device.
- * See Documentation/device-mapper/dm-verity.txt
+ * Based on Chromium dm-verity driver (C) 2011 The Chromium OS Authors
+ *
+ * This file is released under the GPLv2.
+ *
+ * In the file "/sys/module/dm_verity/parameters/prefetch_cluster" you can set
+ * default prefetch value. Data are read in "prefetch_cluster" chunks from the
+ * hash device. Setting this greatly improves performance when data and hash
+ * are on the same disk on different partitions on devices with poor random
+ * access behavior.
*/
-#include <linux/async.h>
-#include <linux/bio.h>
-#include <linux/blkdev.h>
-#include <linux/debugfs.h>
-#include <linux/delay.h>
-#include <linux/device.h>
-#include <linux/err.h>
-#include <linux/genhd.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/mempool.h>
-#include <linux/module.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <linux/workqueue.h>
-#include <asm/atomic.h>
-#include <asm/page.h>
-
-/* #define CONFIG_DM_DEBUG 1 */
-#define CONFIG_DM_VERITY_TRACE 1
-#include <linux/device-mapper.h>
-#include <linux/dm-bht.h>
-
-#include "dm-verity.h"
-#include "md.h"
-
-#define DM_MSG_PREFIX "verity"
-/* Supports up to 512-bit digests */
-#define VERITY_MAX_DIGEST_SIZE 64
+#include "dm-bufio.h"
-/* TODO(wad) make both of these report the error line/file to a
- * verity_bug function.
- */
-#define VERITY_BUG(msg...) BUG()
-#define VERITY_BUG_ON(cond, msg...) BUG_ON(cond)
-
-/* Helper for printing sector_t */
-#define ULL(x) ((unsigned long long)(x))
-
-/* IOS represent min of dm_verity_ios in a pool, but we also use it to
- * preallocate biosets (MIN_IOS * 2):
- * 1. We need to clone the entire bioset, including bio_vecs, before passing
- * them to the underlying block layer since it may alter the values.
- * 2. We need to pad out biosets that are not block aligned.
- * 3. We need to be able to create biosets while loading in hashes.
- * This will need more tweaking for specific workload expectations.
- */
-#define MIN_IOS 32
-/* During io_bht_read, we will spawn _many_ bios for a single I/O early on, but
- * once the tree is populated, we will only need MIN_IOS at most to be able to
- * pad out the request. We will also need space for the padding biovecs which
- * is at most 2, less than one page per side.
- */
-#define MIN_BIOS (MIN_IOS * 2)
-
-/* MUST be true: SECTOR_SHIFT <= VERITY_BLOCK_SHIFT <= PAGE_SHIFT */
-#define VERITY_BLOCK_SIZE 4096
-#define VERITY_BLOCK_SHIFT 12
+#include <linux/module.h>
+#include <linux/device-mapper.h>
+#include <crypto/hash.h>
-/* Support additional tracing of requests */
-#ifdef CONFIG_DM_VERITY_TRACE
-#define VERITY_TRACE(param, fmt, args...) { \
- if (param) \
- DMINFO(fmt, ## args); \
-}
-static int request_trace;
-module_param(request_trace, int, 0644);
-MODULE_PARM_DESC(request_trace, "Enable request tracing to DMINFO");
-
-static int alloc_trace;
-module_param(alloc_trace, int, 0644);
-MODULE_PARM_DESC(alloc_trace, "Enable allocation tracing to DMINFO");
-#else
-#define VERITY_TRACE(...)
-#endif
-
-#define REQTRACE(fmt, args...) VERITY_TRACE(request_trace, "req: " fmt, ## args)
-#define ALLOCTRACE(fmt, args...) \
- VERITY_TRACE(alloc_trace, "alloc: " fmt, ## args)
-
-/* Provide a lightweight means of specifying the global default for
- * error behavior: eio, reboot, or none
- * Legacy support for 0 = eio, 1 = reboot/panic, 2 = none, 3 = notify.
- * This is matched to the enum in dm-verity.h.
- */
-static const char *allowed_error_behaviors[] = { "eio", "panic", "none",
- "notify", NULL };
-static char *error_behavior = "eio";
-module_param(error_behavior, charp, 0644);
-MODULE_PARM_DESC(error_behavior, "Behavior on error "
- "(eio, panic, none, notify)");
-
-/* Controls whether verity_get_device will wait forever for a device. */
-static int dev_wait;
-module_param(dev_wait, int, 0444);
-MODULE_PARM_DESC(dev_wait, "Wait forever for a backing device");
-
-/* Used for tracking pending bios as well as for exporting information via
- * STATUSTYPE_INFO.
- */
-struct verity_stats {
- unsigned int io_queue; /* # pending I/O operations */
- unsigned int verify_queue; /* # pending verify operations */
- unsigned int average_requeues; /* not implemented */
+#define DM_MSG_PREFIX "verity"
- /*
- * Number of times a data block was ready but we didn't have the hash
- * blocks for it yet */
- unsigned long long total_requeues;
- unsigned long long total_requests; /* number of reads */
+#define DM_VERITY_IO_VEC_INLINE 16
+#define DM_VERITY_MEMPOOL_SIZE 4
+#define DM_VERITY_DEFAULT_PREFETCH_SIZE 262144
- unsigned long long total_blocks; /* total blocks read */
- unsigned long long total_size; /* total blocks read */
+#define DM_VERITY_MAX_LEVELS 63
- unsigned long bht_requests; /* number of hash blocks read */
+static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE;
- /* number of reads for each block size (log2) */
- unsigned long io_by_block_size[sizeof(uint64_t) * 8];
- unsigned long long io_size_by_block_size[sizeof(uint64_t) * 8];
-};
+module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR);
-/* per-requested-bio private data */
-enum verity_io_flags {
- VERITY_IOFLAGS_CLONED = 0x1, /* original bio has been cloned */
+struct dm_verity {
+ struct dm_dev *data_dev;
+ struct dm_dev *hash_dev;
+ struct dm_target *ti;
+ struct dm_bufio_client *bufio;
+ char *alg_name;
+ struct crypto_shash *tfm;
+ u8 *root_digest; /* digest of the root block */
+ u8 *salt; /* salt: its size is salt_size */
+ unsigned salt_size;
+ sector_t data_start; /* data offset in 512-byte sectors */
+ sector_t hash_start; /* hash start in blocks */
+ sector_t data_blocks; /* the number of data blocks */
+ sector_t hash_blocks; /* the number of hash blocks */
+ unsigned char data_dev_block_bits; /* log2(data blocksize) */
+ unsigned char hash_dev_block_bits; /* log2(hash blocksize) */
+ unsigned char hash_per_block_bits; /* log2(hashes in hash block) */
+ unsigned char levels; /* the number of tree levels */
+ unsigned char version;
+ unsigned digest_size; /* digest size for the current hash algorithm */
+ unsigned shash_descsize;/* the size of temporary space for crypto */
+ int hash_failed; /* set to 1 if hash of any block failed */
+
+ mempool_t *io_mempool; /* mempool of struct dm_verity_io */
+ mempool_t *vec_mempool; /* mempool of bio vector */
+
+ struct workqueue_struct *verify_wq;
+
+ /* starting blocks for each tree level. 0 is the lowest level. */
+ sector_t hash_level_block[DM_VERITY_MAX_LEVELS];
};
struct dm_verity_io {
- struct dm_target *target;
+ struct dm_verity *v;
struct bio *bio;
- struct delayed_work work;
- unsigned int flags;
- int error;
- atomic_t pending;
+ /* original values of bio->bi_end_io and bio->bi_private */
+ bio_end_io_t *orig_bi_end_io;
+ void *orig_bi_private;
- sector_t sector; /* converted to target sector */
- u64 block; /* aligned block index */
- u64 count; /* aligned count in blocks */
-};
+ sector_t block;
+ unsigned n_blocks;
-struct verity_config {
- struct dm_dev *dev;
- sector_t start;
- sector_t size;
+ /* saved bio vector */
+ struct bio_vec *io_vec;
+ unsigned io_vec_size;
- struct dm_dev *hash_dev;
- sector_t hash_start;
+ struct work_struct work;
- struct dm_bht bht;
+ /* A space for short vectors; longer vectors are allocated separately. */
+ struct bio_vec io_vec_inline[DM_VERITY_IO_VEC_INLINE];
- /* Pool required for io contexts */
- mempool_t *io_pool;
- /* Pool and bios required for making sure that backing device reads are
- * in PAGE_SIZE increments.
+ /*
+ * Three variably-size fields follow this struct:
+ *
+ * u8 hash_desc[v->shash_descsize];
+ * u8 real_digest[v->digest_size];
+ * u8 want_digest[v->digest_size];
+ *
+ * To access them use: io_hash_desc(), io_real_digest() and io_want_digest().
*/
- struct bio_set *bs;
-
- char hash_alg[CRYPTO_MAX_ALG_NAME];
-
- int error_behavior;
-
- struct verity_stats stats;
- const char *name; /* name for this config */
- struct dentry *debugfs_dir; /* debugfs dir for this config */
};
-static struct kmem_cache *_verity_io_pool;
-static struct workqueue_struct *kveritydq, *kverityd_ioq;
-static struct dentry *debugfs_root; /* top-level debugfs dir for verity */
-
-static void kverityd_verify(struct work_struct *work);
-static void kverityd_io(struct work_struct *work);
-static void kverityd_io_bht_populate(struct dm_verity_io *io);
-static void kverityd_io_bht_populate_end(struct bio *, int error);
-
-static BLOCKING_NOTIFIER_HEAD(verity_error_notifier);
-
-/*-----------------------------------------------
- * Statistic tracking functions
- *-----------------------------------------------*/
-
-void verity_stats_io_queue_inc(struct verity_config *vc)
+static struct shash_desc *io_hash_desc(struct dm_verity *v, struct dm_verity_io *io)
{
- vc->stats.io_queue++;
+ return (struct shash_desc *)(io + 1);
}
-void verity_stats_verify_queue_inc(struct verity_config *vc)
+static u8 *io_real_digest(struct dm_verity *v, struct dm_verity_io *io)
{
- vc->stats.verify_queue++;
+ return (u8 *)(io + 1) + v->shash_descsize;
}
-void verity_stats_io_queue_dec(struct verity_config *vc)
+static u8 *io_want_digest(struct dm_verity *v, struct dm_verity_io *io)
{
- vc->stats.io_queue--;
+ return (u8 *)(io + 1) + v->shash_descsize + v->digest_size;
}
-void verity_stats_verify_queue_dec(struct verity_config *vc)
-{
- vc->stats.verify_queue--;
-}
+/*
+ * Auxiliary structure appended to each dm-bufio buffer. If the value
+ * hash_verified is nonzero, hash of the block has been verified.
+ *
+ * The variable hash_verified is set to 0 when allocating the buffer, then
+ * it can be changed to 1 and it is never reset to 0 again.
+ *
+ * There is no lock around this value, a race condition can at worst cause
+ * that multiple processes verify the hash of the same buffer simultaneously
+ * and write 1 to hash_verified simultaneously.
+ * This condition is harmless, so we don't need locking.
+ */
+struct buffer_aux {
+ int hash_verified;
+};
-void verity_stats_total_requeues_inc(struct verity_config *vc)
+/*
+ * Initialize struct buffer_aux for a freshly created buffer.
+ */
+static void dm_bufio_alloc_callback(struct dm_buffer *buf)
{
- vc->stats.total_requeues++;
-}
+ struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
-void verity_stats_total_requests_inc(struct verity_config *vc)
-{
- vc->stats.total_requests++;
+ aux->hash_verified = 0;
}
-void verity_stats_average_requeues(struct verity_config *vc, int requeues)
+/*
+ * Translate input sector number to the sector number on the target device.
+ */
+static sector_t verity_map_sector(struct dm_verity *v, sector_t bi_sector)
{
- /* TODO(wad) */
+ return v->data_start + dm_target_offset(v->ti, bi_sector);
}
-/*-----------------------------------------------
- * Exported interfaces
- *-----------------------------------------------*/
-
-int dm_verity_register_error_notifier(struct notifier_block *nb)
+/*
+ * Return hash position of a specified block at a specified tree level
+ * (0 is the lowest level).
+ * The lowest "hash_per_block_bits"-bits of the result denote hash position
+ * inside a hash block. The remaining bits denote location of the hash block.
+ */
+static sector_t verity_position_at_level(struct dm_verity *v, sector_t block,
+ int level)
{
- return blocking_notifier_chain_register(&verity_error_notifier, nb);
+ return block >> (level * v->hash_per_block_bits);
}
-EXPORT_SYMBOL_GPL(dm_verity_register_error_notifier);
-int dm_verity_unregister_error_notifier(struct notifier_block *nb)
+static void verity_hash_at_level(struct dm_verity *v, sector_t block, int level,
+ sector_t *hash_block, unsigned *offset)
{
- return blocking_notifier_chain_unregister(&verity_error_notifier, nb);
-}
-EXPORT_SYMBOL_GPL(dm_verity_unregister_error_notifier);
-
-/*-----------------------------------------------
- * Allocation and utility functions
- *-----------------------------------------------*/
+ sector_t position = verity_position_at_level(v, block, level);
+ unsigned idx;
-static void kverityd_src_io_read_end(struct bio *clone, int error);
+ *hash_block = v->hash_level_block[level] + (position >> v->hash_per_block_bits);
-/* Shared destructor for all internal bios */
-static void dm_verity_bio_destructor(struct bio *bio)
-{
- struct dm_verity_io *io = bio->bi_private;
- struct verity_config *vc = io->target->private;
- bio_free(bio, vc->bs);
-}
+ if (!offset)
+ return;
-struct bio *verity_alloc_bioset(struct verity_config *vc, gfp_t gfp_mask,
- int nr_iovecs)
-{
- return bio_alloc_bioset(gfp_mask, nr_iovecs, vc->bs);
+ idx = position & ((1 << v->hash_per_block_bits) - 1);
+ if (!v->version)
+ *offset = idx * v->digest_size;
+ else
+ *offset = idx << (v->hash_dev_block_bits - v->hash_per_block_bits);
}
-static struct dm_verity_io *verity_io_alloc(struct dm_target *ti,
- struct bio *bio, sector_t sector)
+/*
+ * Verify hash of a metadata block pertaining to the specified data block
+ * ("block" argument) at a specified level ("level" argument).
+ *
+ * On successful return, io_want_digest(v, io) contains the hash value for
+ * a lower tree level or for the data block (if we're at the lowest leve).
+ *
+ * If "skip_unverified" is true, unverified buffer is skipped and 1 is returned.
+ * If "skip_unverified" is false, unverified buffer is hashed and verified
+ * against current value of io_want_digest(v, io).
+ */
+static int verity_verify_level(struct dm_verity_io *io, sector_t block,
+ int level, bool skip_unverified)
{
- struct verity_config *vc = ti->private;
- struct dm_verity_io *io;
-
- ALLOCTRACE("dm_verity_io for sector %llu", ULL(sector));
- io = mempool_alloc(vc->io_pool, GFP_NOIO);
- if (unlikely(!io))
- return NULL;
- io->flags = 0;
- io->target = ti;
- io->bio = bio;
- io->sector = sector;
- io->error = 0;
-
- /* Adjust the sector by the virtual starting sector */
- io->block = (to_bytes(sector)) >> VERITY_BLOCK_SHIFT;
- io->count = bio->bi_size >> VERITY_BLOCK_SHIFT;
-
- DMDEBUG("io_alloc for %llu blocks starting at %llu",
- ULL(io->count), ULL(io->block));
-
- atomic_set(&io->pending, 0);
+ struct dm_verity *v = io->v;
+ struct dm_buffer *buf;
+ struct buffer_aux *aux;
+ u8 *data;
+ int r;
+ sector_t hash_block;
+ unsigned offset;
- return io;
-}
+ verity_hash_at_level(v, block, level, &hash_block, &offset);
-static struct bio *verity_bio_clone(struct dm_verity_io *io)
-{
- struct verity_config *vc = io->target->private;
- struct bio *bio = io->bio;
- struct bio *clone = verity_alloc_bioset(vc, GFP_NOIO, bio->bi_max_vecs);
+ data = dm_bufio_read(v->bufio, hash_block, &buf);
+ if (unlikely(IS_ERR(data)))
+ return PTR_ERR(data);
- if (!clone)
- return NULL;
+ aux = dm_bufio_get_aux_data(buf);
- __bio_clone(clone, bio);
- clone->bi_private = io;
- clone->bi_end_io = kverityd_src_io_read_end;
- clone->bi_bdev = vc->dev->bdev;
- clone->bi_sector = vc->start + io->sector;
- clone->bi_destructor = dm_verity_bio_destructor;
+ if (!aux->hash_verified) {
+ struct shash_desc *desc;
+ u8 *result;
- return clone;
-}
+ if (skip_unverified) {
+ r = 1;
+ goto release_ret_r;
+ }
-/* If the request is not successful, this handler takes action.
- * TODO make this call a registered handler.
- */
-static void verity_error(struct verity_config *vc, struct dm_verity_io *io,
- int error)
-{
- const char *message;
- int error_behavior = DM_VERITY_ERROR_BEHAVIOR_PANIC;
- dev_t devt = 0;
- u64 block = ~0;
- int transient = 1;
- struct dm_verity_error_state error_state;
-
- if (vc) {
- devt = vc->dev->bdev->bd_dev;
- error_behavior = vc->error_behavior;
- }
+ desc = io_hash_desc(v, io);
+ desc->tfm = v->tfm;
+ desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+ r = crypto_shash_init(desc);
+ if (r < 0) {
+ DMERR("crypto_shash_init failed: %d", r);
+ goto release_ret_r;
+ }
- if (io) {
- io->error = -EIO;
- block = io->block;
- }
+ if (likely(v->version >= 1)) {
+ r = crypto_shash_update(desc, v->salt, v->salt_size);
+ if (r < 0) {
+ DMERR("crypto_shash_update failed: %d", r);
+ goto release_ret_r;
+ }
+ }
- switch (error) {
- case -ENOMEM:
- message = "out of memory";
- break;
- case -EBUSY:
- message = "pending data seen during verify";
- break;
- case -EFAULT:
- message = "crypto operation failure";
- break;
- case -EACCES:
- message = "integrity failure";
- /* Image is bad. */
- transient = 0;
- break;
- case -EPERM:
- message = "hash tree population failure";
- /* Should be dm-bht specific errors */
- transient = 0;
- break;
- case -EINVAL:
- message = "unexpected missing/invalid data";
- /* The device was configured incorrectly - fallback. */
- transient = 0;
- break;
- default:
- /* Other errors can be passed through as IO errors */
- message = "unknown or I/O error";
- return;
- }
+ r = crypto_shash_update(desc, data, 1 << v->hash_dev_block_bits);
+ if (r < 0) {
+ DMERR("crypto_shash_update failed: %d", r);
+ goto release_ret_r;
+ }
- DMERR_LIMIT("verification failure occurred: %s", message);
-
- if (error_behavior == DM_VERITY_ERROR_BEHAVIOR_NOTIFY) {
- error_state.code = error;
- error_state.transient = transient;
- error_state.block = block;
- error_state.message = message;
- error_state.dev_start = vc->start;
- error_state.dev_len = vc->size;
- error_state.dev = vc->dev->bdev;
- error_state.hash_dev_start = vc->hash_start;
- error_state.hash_dev_len = dm_bht_sectors(&vc->bht);
- error_state.hash_dev = vc->hash_dev->bdev;
-
- /* Set default fallthrough behavior. */
- error_state.behavior = DM_VERITY_ERROR_BEHAVIOR_PANIC;
- error_behavior = DM_VERITY_ERROR_BEHAVIOR_PANIC;
-
- if (!blocking_notifier_call_chain(
- &verity_error_notifier, transient, &error_state)) {
- error_behavior = error_state.behavior;
+ if (!v->version) {
+ r = crypto_shash_update(desc, v->salt, v->salt_size);
+ if (r < 0) {
+ DMERR("crypto_shash_update failed: %d", r);
+ goto release_ret_r;
+ }
}
- }
- switch (error_behavior) {
- case DM_VERITY_ERROR_BEHAVIOR_EIO:
- break;
- case DM_VERITY_ERROR_BEHAVIOR_NONE:
- if (error != -EIO && io)
- io->error = 0;
- break;
- default:
- goto do_panic;
+ result = io_real_digest(v, io);
+ r = crypto_shash_final(desc, result);
+ if (r < 0) {
+ DMERR("crypto_shash_final failed: %d", r);
+ goto release_ret_r;
+ }
+ if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) {
+ DMERR_LIMIT("metadata block %llu is corrupted",
+ (unsigned long long)hash_block);
+ v->hash_failed = 1;
+ r = -EIO;
+ goto release_ret_r;
+ } else
+ aux->hash_verified = 1;
}
- return;
-do_panic:
- panic("dm-verity failure: "
- "device:%u:%u error:%d block:%llu message:%s",
- MAJOR(devt), MINOR(devt), error, ULL(block), message);
-}
+ data += offset;
-/**
- * verity_parse_error_behavior - parse a behavior charp to the enum
- * @behavior: NUL-terminated char array
- *
- * Checks if the behavior is valid either as text or as an index digit
- * and returns the proper enum value or -1 on error.
- */
-static int verity_parse_error_behavior(const char *behavior)
-{
- const char **allowed = allowed_error_behaviors;
- char index = '0';
+ memcpy(io_want_digest(v, io), data, v->digest_size);
- for (; *allowed; allowed++, index++)
- if (!strcmp(*allowed, behavior) || behavior[0] == index)
- break;
+ dm_bufio_release(buf);
+ return 0;
- if (!*allowed)
- return -1;
+release_ret_r:
+ dm_bufio_release(buf);
- /* Convert to the integer index matching the enum. */
- return allowed - allowed_error_behaviors;
+ return r;
}
-
-/**
- * match_dev_by_uuid - callback for finding a partition using its uuid
- * @dev: device passed in by the caller
- * @data: opaque pointer to a uuid packed by part_pack_uuid().
- *
- * Returns 1 if the device matches, and 0 otherwise.
+/*
+ * Verify one "dm_verity_io" structure.
*/
-static int match_dev_by_uuid(struct device *dev, void *data)
+static int verity_verify_io(struct dm_verity_io *io)
{
- u8 *uuid = data;
- struct hd_struct *part = dev_to_part(dev);
+ struct dm_verity *v = io->v;
+ unsigned b;
+ int i;
+ unsigned vector = 0, offset = 0;
+
+ for (b = 0; b < io->n_blocks; b++) {
+ struct shash_desc *desc;
+ u8 *result;
+ int r;
+ unsigned todo;
+
+ if (likely(v->levels)) {
+ /*
+ * First, we try to get the requested hash for
+ * the current block. If the hash block itself is
+ * verified, zero is returned. If it isn't, this
+ * function returns 0 and we fall back to whole
+ * chain verification.
+ */
+ int r = verity_verify_level(io, io->block + b, 0, true);
+ if (likely(!r))
+ goto test_block_hash;
+ if (r < 0)
+ return r;
+ }
- if (!part->info)
- goto no_match;
+ memcpy(io_want_digest(v, io), v->root_digest, v->digest_size);
- if (memcmp(uuid, part->info->uuid, sizeof(part->info->uuid)))
- goto no_match;
+ for (i = v->levels - 1; i >= 0; i--) {
+ int r = verity_verify_level(io, io->block + b, i, false);
+ if (unlikely(r))
+ return r;
+ }
- return 1;
-no_match:
- return 0;
-}
+test_block_hash:
+ desc = io_hash_desc(v, io);
+ desc->tfm = v->tfm;
+ desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+ r = crypto_shash_init(desc);
+ if (r < 0) {
+ DMERR("crypto_shash_init failed: %d", r);
+ return r;
+ }
-/**
- * dm_get_device_by_uuid: claim a device using its UUID
- * @ti: current dm_target
- * @uuid_string: 36 byte UUID hex encoded
- * (xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx)
- * @dev_start: offset in sectors passed to dm_get_device
- * @dev_len: length in sectors passed to dm_get_device
- * @dm_dev: dm_dev to populate
- *
- * Wraps dm_get_device allowing it to use a unique partition id to
- * find a given partition on any drive. This code is based on
- * printk_all_partitions in that it walks all of the register block devices.
- *
- * N.B., uuid_string is not checked for safety just strlen().
- */
-static int dm_get_device_by_uuid(struct dm_target *ti, const char *uuid_str,
- sector_t dev_start, sector_t dev_len,
- struct dm_dev **dm_dev)
-{
- struct device *dev = NULL;
- dev_t devt = 0;
- char devt_buf[BDEVT_SIZE];
- u8 uuid[16];
- size_t uuid_length = strlen(uuid_str);
-
- if (uuid_length < 36)
- goto bad_uuid;
- /* Pack the requested UUID in the expected format. */
- part_pack_uuid(uuid_str, uuid);
-
- dev = class_find_device(&block_class, NULL, uuid, &match_dev_by_uuid);
- if (!dev)
- goto found_nothing;
-
- devt = dev->devt;
- put_device(dev);
-
- /* The caller may specify +/-%u after the UUID if they want a partition
- * before or after the one identified.
- */
- if (uuid_length > 36) {
- unsigned int part_offset;
- char sign;
- unsigned minor = MINOR(devt);
- if (sscanf(uuid_str + 36, "%c%u", &sign, &part_offset) == 2) {
- if (sign == '+') {
- minor += part_offset;
- } else if (sign == '-') {
- minor -= part_offset;
- } else {
- DMWARN("Trailing characters after UUID: %s\n",
- uuid_str);
+ if (likely(v->version >= 1)) {
+ r = crypto_shash_update(desc, v->salt, v->salt_size);
+ if (r < 0) {
+ DMERR("crypto_shash_update failed: %d", r);
+ return r;
+ }
+ }
+
+ todo = 1 << v->data_dev_block_bits;
+ do {
+ struct bio_vec *bv;
+ u8 *page;
+ unsigned len;
+
+ BUG_ON(vector >= io->io_vec_size);
+ bv = &io->io_vec[vector];
+ page = kmap_atomic(bv->bv_page);
+ len = bv->bv_len - offset;
+ if (likely(len >= todo))
+ len = todo;
+ r = crypto_shash_update(desc,
+ page + bv->bv_offset + offset, len);
+ kunmap_atomic(page);
+ if (r < 0) {
+ DMERR("crypto_shash_update failed: %d", r);
+ return r;
+ }
+ offset += len;
+ if (likely(offset == bv->bv_len)) {
+ offset = 0;
+ vector++;
}
- devt = MKDEV(MAJOR(devt), minor);
+ todo -= len;
+ } while (todo);
+
+ if (!v->version) {
+ r = crypto_shash_update(desc, v->salt, v->salt_size);
+ if (r < 0) {
+ DMERR("crypto_shash_update failed: %d", r);
+ return r;
+ }
+ }
+
+ result = io_real_digest(v, io);
+ r = crypto_shash_final(desc, result);
+ if (r < 0) {
+ DMERR("crypto_shash_final failed: %d", r);
+ return r;
+ }
+ if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) {
+ DMERR_LIMIT("data block %llu is corrupted",
+ (unsigned long long)(io->block + b));
+ v->hash_failed = 1;
+ return -EIO;
}
}
+ BUG_ON(vector != io->io_vec_size);
+ BUG_ON(offset);
- /* Construct the dev name to pass to dm_get_device. dm_get_device
- * doesn't support being passed a dev_t.
- */
- snprintf(devt_buf, sizeof(devt_buf), "%u:%u", MAJOR(devt), MINOR(devt));
-
- /* TODO(wad) to make this generic we could also pass in the mode. */
- if (!dm_get_device(ti, devt_buf, dm_table_get_mode(ti->table), dm_dev))
- return 0;
-
- ti->error = "Failed to acquire device";
- DMDEBUG("Failed to acquire discovered device %s", devt_buf);
- return -1;
-bad_uuid:
- ti->error = "Bad UUID";
- DMDEBUG("Supplied value '%s' is an invalid UUID", uuid_str);
- return -1;
-found_nothing:
- DMDEBUG("No matching partition for GUID: %s", uuid_str);
- ti->error = "No matching GUID";
- return -1;
+ return 0;
}
-static int verity_get_device(struct dm_target *ti, const char *devname,
- sector_t dev_start, sector_t dev_len,
- struct dm_dev **dm_dev)
+/*
+ * End one "io" structure with a given error.
+ */
+static void verity_finish_io(struct dm_verity_io *io, int error)
{
- do {
- /* Try the normal path first since if everything is ready, it
- * will be the fastest.
- */
- if (!dm_get_device(ti, devname,
- dm_table_get_mode(ti->table), dm_dev))
- return 0;
-
- /* Try the device by partition UUID */
- if (!dm_get_device_by_uuid(ti, devname, dev_start, dev_len,
- dm_dev))
- return 0;
-
- /* No need to be too aggressive since this is a slow path. */
- msleep(500);
- } while (dev_wait && (driver_probe_done() != 0 || *dm_dev == NULL));
- async_synchronize_full();
- return -1;
-}
-
-
-/*-----------------------------------------------------------------
- * Reverse flow of requests into the device.
- *
- * (Start at the bottom with verity_map and work your way upward).
- *-----------------------------------------------------------------*/
+ struct bio *bio = io->bio;
+ struct dm_verity *v = io->v;
-static void verity_inc_pending(struct dm_verity_io *io);
+ bio->bi_end_io = io->orig_bi_end_io;
+ bio->bi_private = io->orig_bi_private;
-static void verity_return_bio_to_caller(struct dm_verity_io *io)
-{
- struct verity_config *vc = io->target->private;
+ if (io->io_vec != io->io_vec_inline)
+ mempool_free(io->io_vec, v->vec_mempool);
- if (io->error)
- verity_error(vc, io, io->error);
+ mempool_free(io, v->io_mempool);
- bio_endio(io->bio, io->error);
- mempool_free(io, vc->io_pool);
+ bio_endio(bio, error);
}
-/* Check for any missing bht hashes. */
-static bool verity_is_bht_populated(struct dm_verity_io *io)
+static void verity_work(struct work_struct *w)
{
- struct verity_config *vc = io->target->private;
- u64 block;
+ struct dm_verity_io *io = container_of(w, struct dm_verity_io, work);
- for (block = io->block; block < io->block + io->count; ++block)
- if (!dm_bht_is_populated(&vc->bht, block))
- return false;
-
- return true;
+ verity_finish_io(io, verity_verify_io(io));
}
-/* verity_dec_pending manages the lifetime of all dm_verity_io structs.
- * Non-bug error handling is centralized through this interface and
- * all passage from workqueue to workqueue.
- */
-static void verity_dec_pending(struct dm_verity_io *io)
+static void verity_end_io(struct bio *bio, int error)
{
- struct verity_config *vc = io->target->private;
- VERITY_BUG_ON(!io, "NULL argument");
-
- DMDEBUG("dec pending %p: %d--", io, atomic_read(&io->pending));
-
- if (!atomic_dec_and_test(&io->pending))
- goto done;
-
- if (unlikely(io->error))
- goto io_error;
-
- /* I/Os that were pending may now be ready */
- if (verity_is_bht_populated(io)) {
- verity_stats_io_queue_dec(vc);
- verity_stats_verify_queue_inc(vc);
- INIT_DELAYED_WORK(&io->work, kverityd_verify);
- queue_delayed_work(kveritydq, &io->work, 0);
- REQTRACE("Block %llu+ is being queued for verify (io:%p)",
- ULL(io->block), io);
- } else {
- INIT_DELAYED_WORK(&io->work, kverityd_io);
- queue_delayed_work(kverityd_ioq, &io->work, HZ/10);
- verity_stats_total_requeues_inc(vc);
- REQTRACE("Block %llu+ is being requeued for io (io:%p)",
- ULL(io->block), io);
- }
+ struct dm_verity_io *io = bio->bi_private;
-done:
- return;
+ if (error) {
+ verity_finish_io(io, error);
+ return;
+ }
-io_error:
- verity_return_bio_to_caller(io);
+ INIT_WORK(&io->work, verity_work);
+ queue_work(io->v->verify_wq, &io->work);
}
-/* Walks the data set and computes the hash of the data read from the
- * untrusted source device. The computed hash is then passed to dm-bht
- * for verification.
+/*
+ * Prefetch buffers for the specified io.
+ * The root buffer is not prefetched, it is assumed that it will be cached
+ * all the time.
*/
-static int verity_verify(struct verity_config *vc,
- struct bio *bio)
+static void verity_prefetch_io(struct dm_verity *v, struct dm_verity_io *io)
{
- unsigned int idx;
- u64 block;
- int r;
-
- VERITY_BUG_ON(bio == NULL);
-
- block = to_bytes(bio->bi_sector) >> VERITY_BLOCK_SHIFT;
-
- for (idx = bio->bi_idx; idx < bio->bi_vcnt; idx++) {
- struct bio_vec *bv = bio_iovec_idx(bio, idx);
-
- VERITY_BUG_ON(bv->bv_offset % VERITY_BLOCK_SIZE);
- VERITY_BUG_ON(bv->bv_len % VERITY_BLOCK_SIZE);
-
- DMDEBUG("Updating hash for block %llu", ULL(block));
+ int i;
- /* TODO(msb) handle case where multiple blocks fit in a page */
- r = dm_bht_verify_block(&vc->bht, block,
- bv->bv_page, bv->bv_offset);
- /* dm_bht functions aren't expected to return errno friendly
- * values. They are converted here for uniformity.
- */
- if (r > 0) {
- DMERR("Pending data for block %llu seen at verify",
- ULL(block));
- r = -EBUSY;
- goto bad_state;
- }
- if (r < 0) {
- DMERR_LIMIT("Block hash does not match!");
- r = -EACCES;
- goto bad_match;
+ for (i = v->levels - 2; i >= 0; i--) {
+ sector_t hash_block_start;
+ sector_t hash_block_end;
+ verity_hash_at_level(v, io->block, i, &hash_block_start, NULL);
+ verity_hash_at_level(v, io->block + io->n_blocks - 1, i, &hash_block_end, NULL);
+ if (!i) {
+ unsigned cluster = *(volatile unsigned *)&dm_verity_prefetch_cluster;
+
+ cluster >>= v->data_dev_block_bits;
+ if (unlikely(!cluster))
+ goto no_prefetch_cluster;
+
+ if (unlikely(cluster & (cluster - 1)))
+ cluster = 1 << (fls(cluster) - 1);
+
+ hash_block_start &= ~(sector_t)(cluster - 1);
+ hash_block_end |= cluster - 1;
+ if (unlikely(hash_block_end >= v->hash_blocks))
+ hash_block_end = v->hash_blocks - 1;
}
- REQTRACE("Block %llu verified", ULL(block));
-
- block++;
- /* After completing a block, allow a reschedule.
- * TODO(wad) determine if this is truly needed.
- */
- cond_resched();
+no_prefetch_cluster:
+ dm_bufio_prefetch(v->bufio, hash_block_start,
+ hash_block_end - hash_block_start + 1);
}
-
- return 0;
-
-bad_state:
-bad_match:
- return r;
}
-/* Services the verify workqueue */
-static void kverityd_verify(struct work_struct *work)
+/*
+ * Bio map function. It allocates dm_verity_io structure and bio vector and
+ * fills them. Then it issues prefetches and the I/O.
+ */
+static int verity_map(struct dm_target *ti, struct bio *bio,
+ union map_info *map_context)
{
- struct delayed_work *dwork = container_of(work, struct delayed_work,
- work);
- struct dm_verity_io *io = container_of(dwork, struct dm_verity_io,
- work);
- struct verity_config *vc = io->target->private;
-
- io->error = verity_verify(vc, io->bio);
+ struct dm_verity *v = ti->private;
+ struct dm_verity_io *io;
- /* Free up the bio and tag with the return value */
- verity_stats_verify_queue_dec(vc);
- verity_return_bio_to_caller(io);
-}
+ bio->bi_bdev = v->data_dev->bdev;
+ bio->bi_sector = verity_map_sector(v, bio->bi_sector);
-/* Asynchronously called upon the completion of dm-bht I/O. The status
- * of the operation is passed back to dm-bht and the next steps are
- * decided by verity_dec_pending.
- */
-static void kverityd_io_bht_populate_end(struct bio *bio, int error)
-{
- struct dm_bht_entry *entry = (struct dm_bht_entry *) bio->bi_private;
- struct dm_verity_io *io = (struct dm_verity_io *) entry->io_context;
+ if (((unsigned)bio->bi_sector | bio_sectors(bio)) &
+ ((1 << (v->data_dev_block_bits - SECTOR_SHIFT)) - 1)) {
+ DMERR_LIMIT("unaligned io");
+ return -EIO;
+ }
- DMDEBUG("kverityd_io_bht_populate_end (io:%p, entry:%p)", io, entry);
- /* Tell the tree to atomically update now that we've populated
- * the given entry.
- */
- dm_bht_read_completed(entry, error);
-
- /* Clean up for reuse when reading data to be checked */
- bio->bi_vcnt = 0;
- bio->bi_io_vec->bv_offset = 0;
- bio->bi_io_vec->bv_len = 0;
- bio->bi_io_vec->bv_page = NULL;
- /* Restore the private data to I/O so the destructor can be shared. */
- bio->bi_private = (void *) io;
- bio_put(bio);
-
- /* We bail but assume the tree has been marked bad. */
- if (unlikely(error)) {
- DMERR("Failed to read for sector %llu (%u)",
- ULL(io->bio->bi_sector), io->bio->bi_size);
- io->error = error;
- /* Pass through the error to verity_dec_pending below */
+ if ((bio->bi_sector + bio_sectors(bio)) >>
+ (v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) {
+ DMERR_LIMIT("io out of range");
+ return -EIO;
}
- /* When pending = 0, it will transition to reading real data */
- verity_dec_pending(io);
-}
-/* Called by dm-bht (via dm_bht_populate), this function provides
- * the message digests to dm-bht that are stored on disk.
- */
-static int kverityd_bht_read_callback(void *ctx, sector_t start, u8 *dst,
- sector_t count,
- struct dm_bht_entry *entry)
-{
- struct dm_verity_io *io = ctx; /* I/O for this batch */
- struct verity_config *vc;
- struct bio *bio;
- /* Explicitly catches these so we can use a custom bug route */
- VERITY_BUG_ON(!io || !dst || !io->target || !io->target->private);
- VERITY_BUG_ON(!entry);
- VERITY_BUG_ON(count != to_sector(VERITY_BLOCK_SIZE));
+ if (bio_data_dir(bio) == WRITE)
+ return -EIO;
- vc = io->target->private;
+ io = mempool_alloc(v->io_mempool, GFP_NOIO);
+ io->v = v;
+ io->bio = bio;
+ io->orig_bi_end_io = bio->bi_end_io;
+ io->orig_bi_private = bio->bi_private;
+ io->block = bio->bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT);
+ io->n_blocks = bio->bi_size >> v->data_dev_block_bits;
+
+ bio->bi_end_io = verity_end_io;
+ bio->bi_private = io;
+ io->io_vec_size = bio->bi_vcnt - bio->bi_idx;
+ if (io->io_vec_size < DM_VERITY_IO_VEC_INLINE)
+ io->io_vec = io->io_vec_inline;
+ else
+ io->io_vec = mempool_alloc(v->vec_mempool, GFP_NOIO);
+ memcpy(io->io_vec, bio_iovec(bio),
+ io->io_vec_size * sizeof(struct bio_vec));
+
+ verity_prefetch_io(v, io);
- /* The I/O context is nested inside the entry so that we don't need one
- * io context per page read.
- */
- entry->io_context = ctx;
-
- /* We should only get page size requests at present. */
- verity_inc_pending(io);
- bio = verity_alloc_bioset(vc, GFP_NOIO, 1);
- if (unlikely(!bio)) {
- DMCRIT("Out of memory at bio_alloc_bioset");
- dm_bht_read_completed(entry, -ENOMEM);
- return -ENOMEM;
- }
- bio->bi_private = (void *) entry;
- bio->bi_idx = 0;
- bio->bi_size = VERITY_BLOCK_SIZE;
- bio->bi_sector = vc->hash_start + start;
- bio->bi_bdev = vc->hash_dev->bdev;
- bio->bi_end_io = kverityd_io_bht_populate_end;
- bio->bi_rw = REQ_META;
- /* Only need to free the bio since the page is managed by bht */
- bio->bi_destructor = dm_verity_bio_destructor;
- bio->bi_vcnt = 1;
- bio->bi_io_vec->bv_offset = 0;
- bio->bi_io_vec->bv_len = to_bytes(count);
- /* dst is guaranteed to be a page_pool allocation */
- bio->bi_io_vec->bv_page = virt_to_page(dst);
- /* Track that this I/O is in use. There should be no risk of the io
- * being removed prior since this is called synchronously.
- */
- DMDEBUG("Submitting bht io %p (entry:%p)", io, entry);
- vc->stats.bht_requests++;
generic_make_request(bio);
- return 0;
-}
-/* Submits an io request for each missing block of block hashes.
- * The last one to return will then enqueue this on the io workqueue.
- */
-static void kverityd_io_bht_populate(struct dm_verity_io *io)
-{
- struct verity_config *vc = io->target->private;
- u64 block;
-
- REQTRACE("populating %llu starting at block %llu (io:%p)",
- ULL(io->count), ULL(io->block), io);
- for (block = io->block; block < io->block + io->count; ++block) {
- int populated;
-
- DMDEBUG("Calling dm_bht_populate for %ull (io:%p)",
- ULL(block), io);
- populated = dm_bht_populate(&vc->bht, io, block);
- if (populated < 0) {
- DMCRIT("dm_bht_populate error: block %llu (io:%p): %d",
- ULL(block), io, populated);
- /* TODO(wad) support propagating transient errors
- * cleanly.
- */
- /* verity_dec_pending will handle the error case. */
- io->error = -EPERM;
- break;
- }
- }
- REQTRACE("Block %llu+ initiated %d requests (io: %p)",
- ULL(io->block), atomic_read(&io->pending) - 1, io);
+ return DM_MAPIO_SUBMITTED;
}
-/* Asynchronously called upon the completion of I/O issued
- * from kverityd_src_io_read. verity_dec_pending() acts as
- * the scheduler/flow manager.
+/*
+ * Status: V (valid) or C (corruption found)
*/
-static void kverityd_src_io_read_end(struct bio *clone, int error)
+static int verity_status(struct dm_target *ti, status_type_t type,
+ char *result, unsigned maxlen)
{
- struct dm_verity_io *io = clone->bi_private;
-
- DMDEBUG("I/O completed");
- if (unlikely(!bio_flagged(clone, BIO_UPTODATE) && !error))
- error = -EIO;
+ struct dm_verity *v = ti->private;
+ unsigned sz = 0;
+ unsigned x;
- if (unlikely(error)) {
- DMERR("Error occurred: %d (%llu, %u)",
- error, ULL(clone->bi_sector), clone->bi_size);
- io->error = error;
+ switch (type) {
+ case STATUSTYPE_INFO:
+ DMEMIT("%c", v->hash_failed ? 'C' : 'V');
+ break;
+ case STATUSTYPE_TABLE:
+ DMEMIT("%u %s %s %u %u %llu %llu %s ",
+ v->version,
+ v->data_dev->name,
+ v->hash_dev->name,
+ 1 << v->data_dev_block_bits,
+ 1 << v->hash_dev_block_bits,
+ (unsigned long long)v->data_blocks,
+ (unsigned long long)v->hash_start,
+ v->alg_name
+ );
+ for (x = 0; x < v->digest_size; x++)
+ DMEMIT("%02x", v->root_digest[x]);
+ DMEMIT(" ");
+ if (!v->salt_size)
+ DMEMIT("-");
+ else
+ for (x = 0; x < v->salt_size; x++)
+ DMEMIT("%02x", v->salt[x]);
+ break;
}
- /* Release the clone which just avoids the block layer from
- * leaving offsets, etc in unexpected states.
- */
- bio_put(clone);
-
- verity_dec_pending(io);
- DMDEBUG("all data has been loaded from the data device");
+ return 0;
}
-/* If not yet underway, an I/O request will be issued to the vc->dev
- * device for the data needed. It is cloned to avoid unexpected changes
- * to the original bio struct.
- */
-static void kverityd_src_io_read(struct dm_verity_io *io)
+static int verity_ioctl(struct dm_target *ti, unsigned cmd,
+ unsigned long arg)
{
- struct verity_config *vc = io->target->private;
- struct bio *clone;
+ struct dm_verity *v = ti->private;
+ int r = 0;
- VERITY_BUG_ON(!io);
+ if (v->data_start ||
+ ti->len != i_size_read(v->data_dev->bdev->bd_inode) >> SECTOR_SHIFT)
+ r = scsi_verify_blk_ioctl(NULL, cmd);
- /* If clone is non-NULL, then the read is already issued. Could also
- * check BIO_UPTODATE, but it doesn't seem needed.
- */
- if (io->flags & VERITY_IOFLAGS_CLONED) {
- DMDEBUG("io_read called with existing bio. bailing: %p", io);
- return;
- }
- io->flags |= VERITY_IOFLAGS_CLONED;
+ return r ? : __blkdev_driver_ioctl(v->data_dev->bdev, v->data_dev->mode,
+ cmd, arg);
+}
- DMDEBUG("kverity_io_read started");
+static int verity_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
+ struct bio_vec *biovec, int max_size)
+{
+ struct dm_verity *v = ti->private;
+ struct request_queue *q = bdev_get_queue(v->data_dev->bdev);
- /* Clone the bio. The block layer may modify the bvec array. */
- DMDEBUG("Creating clone of the request");
- ALLOCTRACE("clone for io %p, sector %llu",
- io, ULL(vc->start + io->sector));
- clone = verity_bio_clone(io);
- if (unlikely(!clone)) {
- io->error = -ENOMEM;
- return;
- }
+ if (!q->merge_bvec_fn)
+ return max_size;
- verity_inc_pending(io);
+ bvm->bi_bdev = v->data_dev->bdev;
+ bvm->bi_sector = verity_map_sector(v, bvm->bi_sector);
- /* Submit to the block device */
- DMDEBUG("Submitting bio");
- /* XXX: check queue_max_hw_sectors(bdev_get_queue(clone->bi_bdev)); */
- generic_make_request(clone);
+ return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
}
-/* kverityd_io services the I/O workqueue. For each pass through
- * the I/O workqueue, a call to populate both the origin drive
- * data and the hash tree data is made.
- */
-static void kverityd_io(struct work_struct *work)
+static int verity_iterate_devices(struct dm_target *ti,
+ iterate_devices_callout_fn fn, void *data)
{
- struct delayed_work *dwork = container_of(work, struct delayed_work,
- work);
- struct dm_verity_io *io = container_of(dwork, struct dm_verity_io,
- work);
- VERITY_BUG_ON(!io->bio);
-
- /* Issue requests asynchronously. */
- verity_inc_pending(io);
- kverityd_src_io_read(io);
- kverityd_io_bht_populate(io);
- verity_dec_pending(io);
-}
+ struct dm_verity *v = ti->private;
-/* Paired with verity_dec_pending, the pending value in the io dictate the
- * lifetime of a request and when it is ready to be processed on the
- * workqueues.
- */
-static void verity_inc_pending(struct dm_verity_io *io)
-{
- atomic_inc(&io->pending);
+ return fn(ti, v->data_dev, v->data_start, ti->len, data);
}
-/* Block-level requests start here. */
-static int verity_map(struct dm_target *ti, struct bio *bio,
- union map_info *map_context) {
- struct dm_verity_io *io;
- struct verity_config *vc;
- struct request_queue *r_queue;
+static void verity_io_hints(struct dm_target *ti, struct queue_limits *limits)
+{
+ struct dm_verity *v = ti->private;
- if (unlikely(!ti)) {
- DMERR("dm_target was NULL");
- return -EIO;
- }
+ if (limits->logical_block_size < 1 << v->data_dev_block_bits)
+ limits->logical_block_size = 1 << v->data_dev_block_bits;
- vc = ti->private;
- r_queue = bdev_get_queue(vc->dev->bdev);
-
- /* Trace incoming bios */
- REQTRACE("Got a %s for %llu, %u bytes)",
- (bio_rw(bio) == WRITE ? "WRITE" :
- (bio_rw(bio) == READ ? "READ" : "READA")),
- ULL(bio->bi_sector), bio->bi_size);
-
- verity_stats_total_requests_inc(vc);
-
- if (bio_data_dir(bio) == WRITE) {
- /* If we silently drop writes, then the VFS layer will cache
- * the write and persist it in memory. While it doesn't change
- * the underlying storage, it still may be contrary to the
- * behavior expected by a verified, read-only device.
- */
- DMWARN_LIMIT("write request received. rejecting with -EIO.");
- verity_error(vc, NULL, -EIO);
- /* bio_endio(bio, -EIO); */
- return -EIO;
- } else {
- VERITY_BUG_ON(bio->bi_sector % to_sector(VERITY_BLOCK_SIZE));
- VERITY_BUG_ON(bio->bi_size % VERITY_BLOCK_SIZE);
-
- /* Queue up the request to be verified */
- io = verity_io_alloc(ti, bio, bio->bi_sector - ti->begin);
- if (!io) {
- DMERR_LIMIT("Failed to allocate and init IO data");
- return DM_MAPIO_REQUEUE;
- }
- verity_stats_io_queue_inc(vc);
- vc->stats.total_blocks += io->count;
- vc->stats.io_by_block_size[ilog2(io->count)]++;
-
- vc->stats.total_size += bio->bi_size;
- vc->stats.io_size_by_block_size[ilog2(io->count)] +=
- bio->bi_size;
- INIT_DELAYED_WORK(&io->work, kverityd_io);
- queue_delayed_work(kverityd_ioq, &io->work, 0);
- }
+ if (limits->physical_block_size < 1 << v->data_dev_block_bits)
+ limits->physical_block_size = 1 << v->data_dev_block_bits;
- return DM_MAPIO_SUBMITTED;
+ blk_limits_io_min(limits, limits->logical_block_size);
}
-static int verity_stats_seq_show(struct seq_file *seq, void *offset)
+static void verity_dtr(struct dm_target *ti)
{
- struct verity_config *vc = seq->private;
- struct verity_stats *stats = &vc->stats;
- unsigned long long running_total;
- int i;
+ struct dm_verity *v = ti->private;
- seq_printf(seq, "%d\tI/O queue pending\n", (int)stats->io_queue);
- seq_printf(seq, "%u\tVerify queue pending\n", stats->verify_queue);
- seq_printf(seq, "%lu\tHash block requests\n", stats->bht_requests);
- seq_printf(seq, "%llu\tTotal re-queues\n", stats->total_requeues);
- seq_printf(seq, "%llu\tTotal requests\n", stats->total_requests);
- seq_printf(seq, "%lluMB\tTotal size\n", stats->total_size >> 20);
- seq_printf(seq, "%llu\tTotal blocks\n", stats->total_blocks);
- for (running_total = i = 0; i < 30; i++) {
- if (stats->io_by_block_size[i]) {
- running_total += stats->io_size_by_block_size[i];
- seq_printf(seq, "%lu\tRequests of size %u-%u"
- " (%uKB to %uKB), %lluKB, "
- "run.tot. = %lluMB\n",
- stats->io_by_block_size[i],
- 1U << i, (2U << i) - 1,
- 1U << i << VERITY_BLOCK_SHIFT >> 10,
- ((2U << i) - 1) << VERITY_BLOCK_SHIFT >> 10,
- stats->io_size_by_block_size[i] >> 10,
- running_total >> 20);
- }
- }
+ if (v->verify_wq)
+ destroy_workqueue(v->verify_wq);
- return 0;
-}
+ if (v->vec_mempool)
+ mempool_destroy(v->vec_mempool);
-static int verity_stats_open_fs(struct inode *inode, struct file *file)
-{
- return single_open(file, verity_stats_seq_show, inode->i_private);
-}
+ if (v->io_mempool)
+ mempool_destroy(v->io_mempool);
-static const struct file_operations verity_stats_fops = {
- .owner = THIS_MODULE,
- .open = verity_stats_open_fs,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+ if (v->bufio)
+ dm_bufio_client_destroy(v->bufio);
-static int verity_init_debugfs(struct verity_config *vc)
-{
- struct dentry *dir, *stats;
-
- dir = debugfs_create_dir(vc->name, debugfs_root);
- if (!dir)
- goto cant_create_dir;
- stats = debugfs_create_file("stats",
- S_IFREG | S_IRUSR | S_IRGRP | S_IROTH,
- dir, vc, &verity_stats_fops);
- if (!stats)
- goto cant_create_file;
-
- vc->debugfs_dir = dir;
- return 0;
+ kfree(v->salt);
+ kfree(v->root_digest);
-cant_create_file:
- debugfs_remove_recursive(dir);
-cant_create_dir:
- return -ENODEV;
-}
+ if (v->tfm)
+ crypto_free_shash(v->tfm);
-static void verity_cleanup_debugfs(struct verity_config *vc)
-{
- debugfs_remove_recursive(vc->debugfs_dir);
-}
+ kfree(v->alg_name);
+
+ if (v->hash_dev)
+ dm_put_device(ti, v->hash_dev);
+
+ if (v->data_dev)
+ dm_put_device(ti, v->data_dev);
-static void splitarg(char *arg, char **key, char **val) {
- *key = strsep(&arg, "=");
- *val = strsep(&arg, "");
+ kfree(v);
}
/*
- * Non-block interfaces and device-mapper specific code
- */
-
-/**
- * verity_ctr - Construct a verified mapping
- * @ti: Target being created
- * @argc: Number of elements in argv
- * @argv: Vector of key-value pairs (see below).
- *
- * Accepts the following keys:
- * @payload: hashed device
- * @hashtree: device hashtree is stored on
- * @hashstart: start address of hashes (default 0)
- * @alg: hash algorithm
- * @root_hexdigest: toplevel hash of the tree
- * @error_behavior: what to do when verification fails [optional]
- * @salt: salt, in hex [optional]
- *
- * E.g.,
- * payload=/dev/sda2 hashtree=/dev/sda3 alg=sha256
- * root_hexdigest=f08aa4a3695290c569eb1b0ac032ae1040150afb527abbeb0a3da33d82fb2c6e
- *
- * TODO(wad):
- * - Boot time addition
- * - Track block verification to free block_hashes if memory use is a concern
- * Testing needed:
- * - Regular slub_debug tracing (on checkins)
- * - Improper block hash padding
- * - Improper bundle padding
- * - Improper hash layout
- * - Missing padding at end of device
- * - Improperly sized underlying devices
- * - Out of memory conditions (make sure this isn't too flaky under high load!)
- * - Incorrect superhash
- * - Incorrect block hashes
- * - Incorrect bundle hashes
- * - Boot-up read speed; sustained read speeds
+ * Target parameters:
+ * <version> The current format is version 1.
+ * Vsn 0 is compatible with original Chromium OS releases.
+ * <data device>
+ * <hash device>
+ * <data block size>
+ * <hash block size>
+ * <the number of data blocks>
+ * <hash start block>
+ * <algorithm>
+ * <digest>
+ * <salt> Hex string or "-" if no salt.
*/
-static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
{
- struct verity_config *vc = NULL;
- int ret = 0;
- sector_t blocks;
- const char *payload = NULL;
- const char *hashtree = NULL;
- unsigned long hashstart = 0;
- const char *alg = NULL;
- const char *root_hexdigest = NULL;
- const char *dev_error_behavior = error_behavior;
- const char *hexsalt = NULL;
+ struct dm_verity *v;
+ unsigned num;
+ unsigned long long num_ll;
+ int r;
int i;
+ sector_t hash_position;
+ char dummy;
- if (argc >= 6 && !strchr(argv[3], '=')) {
- /* Transitional hack - support the old positional-argument format.
- * Detect it because it requires specifying an unused arg
- * (depth) which does not contain an '='. */
- unsigned long long tmpull;
- if (strcmp(argv[3], "0")) {
- ti->error = "Non-zero depth supplied";
- return -EINVAL;
- }
- if (sscanf(argv[2], "%llu", &tmpull) != 1) {
- ti->error = "Invalid hash_start supplied";
- return -EINVAL;
- }
- payload = argv[0];
- hashtree = argv[1];
- hashstart = tmpull;
- alg = argv[4];
- root_hexdigest = argv[5];
- if (argc > 6)
- dev_error_behavior = argv[6];
- } else {
- for (i = 0; i < argc; ++i) {
- char *key, *val;
- DMWARN("Argument %d: '%s'", i, argv[i]);
- splitarg(argv[i], &key, &val);
- if (!key) {
- DMWARN("Bad argument %d: missing key?", i);
- break;
- }
- if (!val) {
- DMWARN("Bad argument %d='%s': missing value", i, key);
- break;
- }
- if (!strcmp(key, "alg")) {
- alg = val;
- } else if (!strcmp(key, "payload")) {
- payload = val;
- } else if (!strcmp(key, "hashtree")) {
- hashtree = val;
- } else if (!strcmp(key, "root_hexdigest")) {
- root_hexdigest = val;
- } else if (!strcmp(key, "hashstart")) {
- if (strict_strtoul(val, 10, &hashstart)) {
- ti->error = "Invalid hashstart";
- return -EINVAL;
- }
- } else if (!strcmp(key, "error_behavior")) {
- dev_error_behavior = val;
- } else if (!strcmp(key, "salt")) {
- hexsalt = val;
- }
- }
+ v = kzalloc(sizeof(struct dm_verity), GFP_KERNEL);
+ if (!v) {
+ ti->error = "Cannot allocate verity structure";
+ return -ENOMEM;
}
+ ti->private = v;
+ v->ti = ti;
-#define NEEDARG(n) \
- if (!(n)) { \
- ti->error = "Missing argument: " #n; \
- return -EINVAL; \
+ if ((dm_table_get_mode(ti->table) & ~FMODE_READ)) {
+ ti->error = "Device must be readonly";
+ r = -EINVAL;
+ goto bad;
}
- NEEDARG(alg);
- NEEDARG(payload);
- NEEDARG(hashtree);
- NEEDARG(root_hexdigest);
-
-#undef NEEDARG
-
- /* The device mapper device should be setup read-only */
- if ((dm_table_get_mode(ti->table) & ~FMODE_READ) != 0) {
- ti->error = "Must be created readonly.";
- return -EINVAL;
+ if (argc != 10) {
+ ti->error = "Invalid argument count: exactly 10 arguments required";
+ r = -EINVAL;
+ goto bad;
}
- ALLOCTRACE("verity_config");
- vc = kzalloc(sizeof(*vc), GFP_KERNEL);
- if (!vc) {
- /* TODO(wad) if this is called from the setup helper, then we
- * catch these errors and do a CrOS specific thing. if not, we
- * need to have this call the error handler.
- */
- return -EINVAL;
+ if (sscanf(argv[0], "%d%c", &num, &dummy) != 1 ||
+ num < 0 || num > 1) {
+ ti->error = "Invalid version";
+ r = -EINVAL;
+ goto bad;
}
+ v->version = num;
- /* For the name, use the payload default with / changed to _ */
- vc->name = dm_disk(dm_table_get_md(ti->table))->disk_name;
-
- if (verity_init_debugfs(vc))
- goto bad_debugfs;
-
- /* Calculate the blocks from the given device size */
- vc->size = ti->len;
- blocks = to_bytes(vc->size) >> VERITY_BLOCK_SHIFT;
- if (dm_bht_create(&vc->bht, blocks, alg)) {
- DMERR("failed to create required bht");
- goto bad_bht;
- }
- if (dm_bht_set_root_hexdigest(&vc->bht, root_hexdigest)) {
- DMERR("root hexdigest error");
- goto bad_root_hexdigest;
- }
- if (hexsalt)
- dm_bht_set_salt(&vc->bht, hexsalt);
- dm_bht_set_read_cb(&vc->bht, kverityd_bht_read_callback);
-
- /* payload: device to verify */
- vc->start = 0; /* TODO: should this support a starting offset? */
- /* We only ever grab the device in read-only mode. */
- ret = verity_get_device(ti, payload, vc->start, ti->len, &vc->dev);
- if (ret) {
- DMERR("Failed to acquire device '%s': %d", payload, ret);
- ti->error = "Device lookup failed";
- goto bad_verity_dev;
+ r = dm_get_device(ti, argv[1], FMODE_READ, &v->data_dev);
+ if (r) {
+ ti->error = "Data device lookup failed";
+ goto bad;
}
- if ((to_bytes(vc->start) % VERITY_BLOCK_SIZE) ||
- (to_bytes(vc->size) % VERITY_BLOCK_SIZE)) {
- ti->error = "Device must be VERITY_BLOCK_SIZE divisble/aligned";
- goto bad_hash_start;
+ r = dm_get_device(ti, argv[2], FMODE_READ, &v->hash_dev);
+ if (r) {
+ ti->error = "Data device lookup failed";
+ goto bad;
}
- vc->hash_start = (sector_t)hashstart;
-
- /* hashtree: device with hashes.
- * Note, payload == hashtree is okay as long as the size of
- * ti->len passed to device mapper does not include
- * the hashes.
- */
- if (verity_get_device(ti, hashtree, vc->hash_start,
- dm_bht_sectors(&vc->bht), &vc->hash_dev)) {
- ti->error = "Hash device lookup failed";
- goto bad_hash_dev;
+ if (sscanf(argv[3], "%u%c", &num, &dummy) != 1 ||
+ !num || (num & (num - 1)) ||
+ num < bdev_logical_block_size(v->data_dev->bdev) ||
+ num > PAGE_SIZE) {
+ ti->error = "Invalid data device block size";
+ r = -EINVAL;
+ goto bad;
}
+ v->data_dev_block_bits = ffs(num) - 1;
- /* We leave the validity on the hash device open until the
- * next arg. Then we go ahead and try to read in all the bundle
- * hashes which live after the block hashes. If it fails, then
- * the hash offset was wrong.
- */
-
-
- /* arg4: cryptographic digest algorithm */
- if (snprintf(vc->hash_alg, CRYPTO_MAX_ALG_NAME, "%s", alg) >=
- CRYPTO_MAX_ALG_NAME) {
- ti->error = "Hash algorithm name is too long";
- goto bad_hash;
+ if (sscanf(argv[4], "%u%c", &num, &dummy) != 1 ||
+ !num || (num & (num - 1)) ||
+ num < bdev_logical_block_size(v->hash_dev->bdev) ||
+ num > INT_MAX) {
+ ti->error = "Invalid hash device block size";
+ r = -EINVAL;
+ goto bad;
}
+ v->hash_dev_block_bits = ffs(num) - 1;
- /* override with optional device-specific error behavior */
- vc->error_behavior = verity_parse_error_behavior(dev_error_behavior);
- if (vc->error_behavior == -1) {
- ti->error = "Bad error_behavior supplied";
- goto bad_err_behavior;
+ if (sscanf(argv[5], "%llu%c", &num_ll, &dummy) != 1 ||
+ (sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT))
+ >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll) {
+ ti->error = "Invalid data blocks";
+ r = -EINVAL;
+ goto bad;
}
+ v->data_blocks = num_ll;
- /* TODO: Maybe issues a request on the io queue for block 0? */
-
- /* Argument processing is done, setup operational data */
- /* Pool for dm_verity_io objects */
- ALLOCTRACE("slab pool for io objects");
- vc->io_pool = mempool_create_slab_pool(MIN_IOS, _verity_io_pool);
- if (!vc->io_pool) {
- ti->error = "Cannot allocate verity io mempool";
- goto bad_slab_pool;
+ if (ti->len > (v->data_blocks << (v->data_dev_block_bits - SECTOR_SHIFT))) {
+ ti->error = "Data device is too small";
+ r = -EINVAL;
+ goto bad;
}
- /* Allocate the bioset used for request padding */
- /* TODO(wad) allocate a separate bioset for the first verify maybe */
- ALLOCTRACE("bioset for I/O reqs");
- vc->bs = bioset_create(MIN_BIOS, 0);
- if (!vc->bs) {
- ti->error = "Cannot allocate verity bioset";
- goto bad_bs;
+ if (sscanf(argv[6], "%llu%c", &num_ll, &dummy) != 1 ||
+ (sector_t)(num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT))
+ >> (v->hash_dev_block_bits - SECTOR_SHIFT) != num_ll) {
+ ti->error = "Invalid hash start";
+ r = -EINVAL;
+ goto bad;
}
+ v->hash_start = num_ll;
- ti->num_flush_requests = 1;
- ti->private = vc;
-
- /* TODO(wad) add device and hash device names */
- {
- char hashdev[BDEVNAME_SIZE], vdev[BDEVNAME_SIZE];
- bdevname(vc->hash_dev->bdev, hashdev);
- bdevname(vc->dev->bdev, vdev);
- DMINFO("dev:%s hash:%s [sectors:%llu blocks:%llu]", vdev,
- hashdev, ULL(dm_bht_sectors(&vc->bht)), ULL(blocks));
+ v->alg_name = kstrdup(argv[7], GFP_KERNEL);
+ if (!v->alg_name) {
+ ti->error = "Cannot allocate algorithm name";
+ r = -ENOMEM;
+ goto bad;
}
- return 0;
-bad_bs:
- mempool_destroy(vc->io_pool);
-bad_slab_pool:
-bad_err_behavior:
-bad_hash:
- dm_put_device(ti, vc->hash_dev);
-bad_hash_dev:
-bad_hash_start:
- dm_put_device(ti, vc->dev);
-bad_bht:
-bad_root_hexdigest:
-bad_verity_dev:
- verity_cleanup_debugfs(vc);
-bad_debugfs:
- kfree(vc); /* hash is not secret so no need to zero */
- return -EINVAL;
-}
-
-static void verity_dtr(struct dm_target *ti)
-{
- struct verity_config *vc = (struct verity_config *) ti->private;
-
- DMDEBUG("Destroying bs");
- bioset_free(vc->bs);
- DMDEBUG("Destroying io_pool");
- mempool_destroy(vc->io_pool);
-
- DMDEBUG("Destroying block hash tree");
- dm_bht_destroy(&vc->bht);
-
- DMDEBUG("Putting hash_dev");
- dm_put_device(ti, vc->hash_dev);
-
- DMDEBUG("Putting dev");
- dm_put_device(ti, vc->dev);
-
- DMDEBUG("Removing debugfs dir");
- verity_cleanup_debugfs(vc);
-
- DMDEBUG("Destroying config");
- kfree(vc);
-}
-
-static int verity_status(struct dm_target *ti, status_type_t type,
- char *result, unsigned int maxlen) {
- struct verity_config *vc = (struct verity_config *) ti->private;
- unsigned int sz = 0;
- char hashdev[BDEVNAME_SIZE], vdev[BDEVNAME_SIZE];
- u8 hexdigest[VERITY_MAX_DIGEST_SIZE * 2 + 1] = { 0 };
-
- dm_bht_root_hexdigest(&vc->bht, hexdigest, sizeof(hexdigest));
+ v->tfm = crypto_alloc_shash(v->alg_name, 0, 0);
+ if (IS_ERR(v->tfm)) {
+ ti->error = "Cannot initialize hash function";
+ r = PTR_ERR(v->tfm);
+ v->tfm = NULL;
+ goto bad;
+ }
+ v->digest_size = crypto_shash_digestsize(v->tfm);
+ if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) {
+ ti->error = "Digest size too big";
+ r = -EINVAL;
+ goto bad;
+ }
+ v->shash_descsize =
+ sizeof(struct shash_desc) + crypto_shash_descsize(v->tfm);
+
+ v->root_digest = kmalloc(v->digest_size, GFP_KERNEL);
+ if (!v->root_digest) {
+ ti->error = "Cannot allocate root digest";
+ r = -ENOMEM;
+ goto bad;
+ }
+ if (strlen(argv[8]) != v->digest_size * 2 ||
+ hex2bin(v->root_digest, argv[8], v->digest_size)) {
+ ti->error = "Invalid root digest";
+ r = -EINVAL;
+ goto bad;
+ }
+
+ if (strcmp(argv[9], "-")) {
+ v->salt_size = strlen(argv[9]) / 2;
+ v->salt = kmalloc(v->salt_size, GFP_KERNEL);
+ if (!v->salt) {
+ ti->error = "Cannot allocate salt";
+ r = -ENOMEM;
+ goto bad;
+ }
+ if (strlen(argv[9]) != v->salt_size * 2 ||
+ hex2bin(v->salt, argv[9], v->salt_size)) {
+ ti->error = "Invalid salt";
+ r = -EINVAL;
+ goto bad;
+ }
+ }
- switch (type) {
- case STATUSTYPE_INFO:
- DMEMIT("%u %u %u %llu %llu",
- vc->stats.io_queue,
- vc->stats.verify_queue,
- vc->stats.average_requeues,
- vc->stats.total_requeues,
- vc->stats.total_requests);
- break;
+ v->hash_per_block_bits =
+ fls((1 << v->hash_dev_block_bits) / v->digest_size) - 1;
+
+ v->levels = 0;
+ if (v->data_blocks)
+ while (v->hash_per_block_bits * v->levels < 64 &&
+ (unsigned long long)(v->data_blocks - 1) >>
+ (v->hash_per_block_bits * v->levels))
+ v->levels++;
+
+ if (v->levels > DM_VERITY_MAX_LEVELS) {
+ ti->error = "Too many tree levels";
+ r = -E2BIG;
+ goto bad;
+ }
+
+ hash_position = v->hash_start;
+ for (i = v->levels - 1; i >= 0; i--) {
+ sector_t s;
+ v->hash_level_block[i] = hash_position;
+ s = verity_position_at_level(v, v->data_blocks, i);
+ s = (s >> v->hash_per_block_bits) +
+ !!(s & ((1 << v->hash_per_block_bits) - 1));
+ if (hash_position + s < hash_position) {
+ ti->error = "Hash device offset overflow";
+ r = -E2BIG;
+ goto bad;
+ }
+ hash_position += s;
+ }
+ v->hash_blocks = hash_position;
- case STATUSTYPE_TABLE:
- bdevname(vc->hash_dev->bdev, hashdev);
- bdevname(vc->dev->bdev, vdev);
- DMEMIT("/dev/%s /dev/%s %llu %u %s %s",
- vdev,
- hashdev,
- ULL(vc->hash_start),
- vc->bht.depth,
- vc->hash_alg,
- hexdigest);
- break;
+ v->bufio = dm_bufio_client_create(v->hash_dev->bdev,
+ 1 << v->hash_dev_block_bits, 1, sizeof(struct buffer_aux),
+ dm_bufio_alloc_callback, NULL);
+ if (IS_ERR(v->bufio)) {
+ ti->error = "Cannot initialize dm-bufio";
+ r = PTR_ERR(v->bufio);
+ v->bufio = NULL;
+ goto bad;
}
- return 0;
-}
-static int verity_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
- struct bio_vec *biovec, int max_size)
-{
- struct verity_config *vc = ti->private;
- struct request_queue *q = bdev_get_queue(vc->dev->bdev);
+ if (dm_bufio_get_device_size(v->bufio) < v->hash_blocks) {
+ ti->error = "Hash device is too small";
+ r = -E2BIG;
+ goto bad;
+ }
- if (!q->merge_bvec_fn)
- return max_size;
+ v->io_mempool = mempool_create_kmalloc_pool(DM_VERITY_MEMPOOL_SIZE,
+ sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2);
+ if (!v->io_mempool) {
+ ti->error = "Cannot allocate io mempool";
+ r = -ENOMEM;
+ goto bad;
+ }
- bvm->bi_bdev = vc->dev->bdev;
- bvm->bi_sector = vc->start + bvm->bi_sector - ti->begin;
+ v->vec_mempool = mempool_create_kmalloc_pool(DM_VERITY_MEMPOOL_SIZE,
+ BIO_MAX_PAGES * sizeof(struct bio_vec));
+ if (!v->vec_mempool) {
+ ti->error = "Cannot allocate vector mempool";
+ r = -ENOMEM;
+ goto bad;
+ }
- /* Optionally, this could just return 0 to stick to single pages. */
- return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
-}
+ /* WQ_UNBOUND greatly improves performance when running on ramdisk */
+ v->verify_wq = alloc_workqueue("kverityd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus());
+ if (!v->verify_wq) {
+ ti->error = "Cannot allocate workqueue";
+ r = -ENOMEM;
+ goto bad;
+ }
-static int verity_iterate_devices(struct dm_target *ti,
- iterate_devices_callout_fn fn, void *data)
-{
- struct verity_config *vc = ti->private;
+ return 0;
- return fn(ti, vc->dev, vc->start, ti->len, data);
-}
+bad:
+ verity_dtr(ti);
-static void verity_io_hints(struct dm_target *ti,
- struct queue_limits *limits)
-{
- limits->logical_block_size = VERITY_BLOCK_SIZE;
- limits->physical_block_size = VERITY_BLOCK_SIZE;
- blk_limits_io_min(limits, VERITY_BLOCK_SIZE);
+ return r;
}
static struct target_type verity_target = {
- .name = "verity",
- .version = {0, 1, 0},
- .module = THIS_MODULE,
- .ctr = verity_ctr,
- .dtr = verity_dtr,
- .map = verity_map,
- .merge = verity_merge,
- .status = verity_status,
+ .name = "verity",
+ .version = {1, 0, 0},
+ .module = THIS_MODULE,
+ .ctr = verity_ctr,
+ .dtr = verity_dtr,
+ .map = verity_map,
+ .status = verity_status,
+ .ioctl = verity_ioctl,
+ .merge = verity_merge,
.iterate_devices = verity_iterate_devices,
- .io_hints = verity_io_hints,
+ .io_hints = verity_io_hints,
};
-#define VERITY_WQ_FLAGS (WQ_CPU_INTENSIVE|WQ_HIGHPRI)
-
static int __init dm_verity_init(void)
{
- int r = -ENOMEM;
-
- debugfs_root = debugfs_create_dir("dm-verity", NULL);
- if (!debugfs_root) {
- DMERR("failed to create debugfs directory");
- r = -ENODEV;
- goto bad_debugfs_dir;
- }
-
- _verity_io_pool = KMEM_CACHE(dm_verity_io, 0);
- if (!_verity_io_pool) {
- DMERR("failed to allocate pool dm_verity_io");
- goto bad_io_pool;
- }
-
- kverityd_ioq = alloc_workqueue("kverityd_io", VERITY_WQ_FLAGS, 1);
- if (!kverityd_ioq) {
- DMERR("failed to create workqueue kverityd_ioq");
- goto bad_io_queue;
- }
-
- kveritydq = alloc_workqueue("kverityd", VERITY_WQ_FLAGS, 1);
- if (!kveritydq) {
- DMERR("failed to create workqueue kveritydq");
- goto bad_verify_queue;
- }
+ int r;
r = dm_register_target(&verity_target);
- if (r < 0) {
+ if (r < 0)
DMERR("register failed %d", r);
- goto register_failed;
- }
-
- DMINFO("version %u.%u.%u loaded", verity_target.version[0],
- verity_target.version[1], verity_target.version[2]);
return r;
-
-register_failed:
- destroy_workqueue(kveritydq);
-bad_verify_queue:
- destroy_workqueue(kverityd_ioq);
-bad_io_queue:
- kmem_cache_destroy(_verity_io_pool);
-bad_io_pool:
- debugfs_remove_recursive(debugfs_root);
-bad_debugfs_dir:
- return r;
}
static void __exit dm_verity_exit(void)
{
- destroy_workqueue(kveritydq);
- destroy_workqueue(kverityd_ioq);
-
dm_unregister_target(&verity_target);
- kmem_cache_destroy(_verity_io_pool);
- debugfs_remove_recursive(debugfs_root);
}
module_init(dm_verity_init);
module_exit(dm_verity_exit);
-MODULE_AUTHOR("The Chromium OS Authors <chromium-os-dev@chromium.org>");
+MODULE_AUTHOR("Mikulas Patocka <mpatocka@redhat.com>");
+MODULE_AUTHOR("Mandeep Baines <msb@chromium.org>");
+MODULE_AUTHOR("Will Drewry <wad@chromium.org>");
MODULE_DESCRIPTION(DM_NAME " target for transparent disk integrity checking");
MODULE_LICENSE("GPL");