
[PATCH] byteswap target for device-mapper

 linux-2.6.4-rc1-root/drivers/md/Kconfig       |   15 
 linux-2.6.4-rc1-root/drivers/md/Makefile      |    1 
 linux-2.6.4-rc1-root/drivers/md/dm-byteswap.c |  634 ++++++++++++++++++++++++++
 3 files changed, 650 insertions(+)

diff -puN /dev/null drivers/md/dm-byteswap.c
--- /dev/null	2004-01-17 00:25:55.000000000 +0100
+++ linux-2.6.4-rc1-root/drivers/md/dm-byteswap.c	2004-02-29 22:45:07.520474680 +0100
@@ -0,0 +1,634 @@
+/*
+ * Copyright (C) 2003 Christophe Saout <christophe@saout.de>
+ * Copyright (C) 2004 Bartlomiej Zolnierkiewicz
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/bio.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/highmem.h>
+#include <asm/scatterlist.h>
+#include <asm/hardirq.h>
+#include <asm/kmap_types.h>
+
+#include "dm.h"
+
+/*
+ * per bio private data
+ */
+struct byteswap_io {
+	struct dm_target *target;
+	struct bio *bio;
+	struct bio *first_clone;
+	struct work_struct work;
+	atomic_t pending;
+	int error;
+};
+
+/*
+ * context holding the current state of a multi-part conversion
+ */
+struct convert_context {
+	struct bio *bio_in;
+	struct bio *bio_out;
+	unsigned int offset_in;
+	unsigned int offset_out;
+	int idx_in;
+	int idx_out;
+	sector_t sector;
+	int write;
+};
+
+
+struct byteswap_config {
+	struct dm_dev *dev;
+	sector_t start;
+
+	/*
+	 * pool for per bio private data and
+	 * for byteswapping buffer pages
+	 */
+	mempool_t *io_pool;
+	mempool_t *page_pool;
+};
+
+#define MIN_IOS        256
+#define MIN_POOL_PAGES 32
+#define MIN_BIO_PAGES  8
+
+static kmem_cache_t *_byteswap_io_pool;
+
+/*
+ * Mempool alloc and free functions for the page
+ */
+static void *mempool_alloc_page(int gfp_mask, void *data)
+{
+	return alloc_page(gfp_mask);
+}
+
+static void mempool_free_page(void *page, void *data)
+{
+	__free_page(page);
+}
+
+/* stolen from crypto/internal.h */
+enum km_type byteswap_km_types[] = {
+	KM_USER0,
+	KM_USER1,
+	KM_SOFTIRQ0,
+	KM_SOFTIRQ1,
+};
+
+static inline enum km_type byteswap_kmap_type(int out)
+{
+	return byteswap_km_types[(in_softirq() ? 2 : 0) + out];
+}
+
+/*
+ * This function assumes that nbytes == 1 << SECTOR_SHIFT.
+ */
+static int byteswap(struct scatterlist *dst, struct scatterlist *src,
+		    unsigned int nbytes)
+{
+	void *src_p, *dst_p;
+	u16 *s, *d;
+	unsigned int i;
+
+	BUG_ON(nbytes != (1 << SECTOR_SHIFT));
+
+	src_p = kmap_atomic(src->page, byteswap_kmap_type(0)) + src->offset;
+	dst_p = kmap_atomic(dst->page, byteswap_kmap_type(1)) + dst->offset;
+
+	s = src_p;
+	d = dst_p;
+
+	for (i = 0; i < nbytes/2; i++) {
+		*d = *s << 8 | *s >> 8;
+		s++;
+		d++;
+	}
+
+	kunmap_atomic(src_p, byteswap_kmap_type(0));
+	kunmap_atomic(dst_p, byteswap_kmap_type(1));
+	flush_dcache_page(dst->page);
+
+	return 0;
+}
+
+static inline int
+byteswap_convert_scatterlist(struct byteswap_config *bc, struct scatterlist *out,
+			     struct scatterlist *in, unsigned int length,
+			     int write, sector_t sector)
+{
+	return byteswap(out, in, length);
+}
+
+static void
+byteswap_convert_init(struct byteswap_config *bc, struct convert_context *ctx,
+		      struct bio *bio_out, struct bio *bio_in,
+		      sector_t sector, int write)
+{
+	ctx->bio_in = bio_in;
+	ctx->bio_out = bio_out;
+	ctx->offset_in = 0;
+	ctx->offset_out = 0;
+	ctx->idx_in = bio_in ? bio_in->bi_idx : 0;
+	ctx->idx_out = bio_out ? bio_out->bi_idx : 0;
+	ctx->sector = sector; //+ cc->iv_offset;
+	ctx->write = write;
+}
+
+/*
+ * Byteswap data from one bio to another one (can be the same one).
+ */
+static int byteswap_convert(struct byteswap_config *bc,
+			    struct convert_context *ctx)
+{
+	int r = 0;
+
+	while(ctx->idx_in < ctx->bio_in->bi_vcnt &&
+	      ctx->idx_out < ctx->bio_out->bi_vcnt) {
+		struct bio_vec *bv_in = bio_iovec_idx(ctx->bio_in, ctx->idx_in);
+		struct bio_vec *bv_out = bio_iovec_idx(ctx->bio_out, ctx->idx_out);
+		struct scatterlist sg_in = {
+			.page = bv_in->bv_page,
+			.offset = bv_in->bv_offset + ctx->offset_in,
+			.length = 1 << SECTOR_SHIFT
+		};
+		struct scatterlist sg_out = {
+			.page = bv_out->bv_page,
+			.offset = bv_out->bv_offset + ctx->offset_out,
+			.length = 1 << SECTOR_SHIFT
+		};
+
+		ctx->offset_in += sg_in.length;
+		if (ctx->offset_in >= bv_in->bv_len) {
+			ctx->offset_in = 0;
+			ctx->idx_in++;
+		}
+
+		ctx->offset_out += sg_out.length;
+		if (ctx->offset_out >= bv_out->bv_len) {
+			ctx->offset_out = 0;
+			ctx->idx_out++;
+		}
+
+		r = byteswap_convert_scatterlist(bc, &sg_out, &sg_in, sg_in.length,
+						 ctx->write, ctx->sector);
+		if (r < 0)
+			break;
+
+		ctx->sector++;
+	}
+
+	return r;
+}
+
+/*
+ * Generate a new unfragmented bio with the given size
+ * This should never violate the device limitations
+ * May return a smaller bio when running out of pages
+ */
+static struct bio *
+byteswap_alloc_buffer(mempool_t *page_pool, unsigned int size,
+		      struct bio *base_bio, int *bio_vec_idx)
+{
+	struct bio *bio;
+	int nr_iovecs = dm_div_up(size, PAGE_SIZE);
+	int gfp_mask = GFP_NOIO | __GFP_HIGHMEM;
+	int flags = current->flags;
+	int i;
+
+	/*
+	 * Tell VM to act less aggressively and fail earlier.
+	 * This is not necessary but increases throughput.
+	 * FIXME: Is this really intelligent?
+	 */
+	current->flags &= ~PF_MEMALLOC;
+
+	if (base_bio)
+		bio = bio_clone(base_bio, GFP_NOIO);
+	else
+		bio = bio_alloc(GFP_NOIO, nr_iovecs);
+	if (!bio) {
+		if (flags & PF_MEMALLOC)
+			current->flags |= PF_MEMALLOC;
+		return NULL;
+	}
+
+	/* if the last bio was not complete, continue where that one ended */
+	bio->bi_idx = *bio_vec_idx;
+	bio->bi_vcnt = *bio_vec_idx;
+	bio->bi_size = 0;
+	bio->bi_flags &= ~(1 << BIO_SEG_VALID);
+
+	/* bio->bi_idx pages have already been allocated */
+	size -= bio->bi_idx * PAGE_SIZE;
+
+	for(i = bio->bi_idx; i < nr_iovecs; i++) {
+		struct bio_vec *bv = bio_iovec_idx(bio, i);
+
+		bv->bv_page = mempool_alloc(page_pool, gfp_mask);
+		if (!bv->bv_page)
+			break;
+
+		/*
+		 * if additional pages cannot be allocated without waiting,
+		 * return a partially allocated bio, the caller will then try
+		 * to allocate additional bios while submitting this partial bio
+		 */
+		if ((i - bio->bi_idx) == (MIN_BIO_PAGES - 1))
+			gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT;
+
+		bv->bv_offset = 0;
+		if (size > PAGE_SIZE)
+			bv->bv_len = PAGE_SIZE;
+		else
+			bv->bv_len = size;
+
+		bio->bi_size += bv->bv_len;
+		bio->bi_vcnt++;
+		size -= bv->bv_len;
+	}
+
+	if (flags & PF_MEMALLOC)
+		current->flags |= PF_MEMALLOC;
+
+	if (!bio->bi_size) {
+		bio_put(bio);
+		return NULL;
+	}
+
+	/*
+	 * Remember the last bio_vec allocated to be able
+	 * to correctly continue after the splitting.
+	 */
+	*bio_vec_idx = bio->bi_vcnt;
+
+	return bio;
+}
+
+static void byteswap_free_buffer_pages(mempool_t *page_pool,
+				       struct bio *bio, unsigned int bytes)
+{
+	unsigned int start, end;
+	struct bio_vec *bv;
+	int i;
+
+	/*
+	 * This is ugly, but Jens Axboe thinks that using bi_idx in the
+	 * endio function is too dangerous at the moment, so I calculate the
+	 * correct position using bi_vcnt and bi_size.
+	 * The bv_offset and bv_len fields might already be modified but we
+	 * know that we always allocated whole pages.
+	 * A fix to the bi_idx issue in the kernel is in the works, so
+	 * we will hopefully be able to revert to the cleaner solution soon.
+	 */
+	i = bio->bi_vcnt - 1;
+	bv = bio_iovec_idx(bio, i);
+	end = (i << PAGE_SHIFT) + (bv->bv_offset + bv->bv_len) - bio->bi_size;
+	start = end - bytes;
+
+	start >>= PAGE_SHIFT;
+	if (!bio->bi_size)
+		end = bio->bi_vcnt;
+	else
+		end >>= PAGE_SHIFT;
+
+	for(i = start; i < end; i++) {
+		bv = bio_iovec_idx(bio, i);
+		BUG_ON(!bv->bv_page);
+		mempool_free(bv->bv_page, page_pool);
+		bv->bv_page = NULL;
+	}
+}
+
+/*
+ * One of the bios was finished. Check for completion of
+ * the whole request and correctly clean up the buffer.
+ */
+static void dec_pending(struct byteswap_io *io, int error)
+{
+	struct byteswap_config *bc = (struct byteswap_config *)io->target->private;
+
+	if (error < 0)
+		io->error = error;
+
+	if (!atomic_dec_and_test(&io->pending))
+		return;
+
+	if (io->first_clone)
+		bio_put(io->first_clone);
+
+	bio_endio(io->bio, io->bio->bi_size, io->error);
+
+	mempool_free(io, bc->io_pool);
+}
+
+/*
+ * kbyteswapd:
+ *
+ * Needed because it would be very unwise to do byteswapping in an
+ * interrupt context, so bios returning from read requests get
+ * queued here.
+ */
+static struct workqueue_struct *_kbyteswapd_workqueue;
+
+static void kbyteswapd_do_work(void *data)
+{
+	struct byteswap_io *io = (struct byteswap_io *)data;
+	struct byteswap_config *bc = (struct byteswap_config *)io->target->private;
+	struct convert_context ctx;
+	int r;
+
+	byteswap_convert_init(bc, &ctx, io->bio, io->bio,
+			      io->bio->bi_sector - io->target->begin, 0);
+	r = byteswap_convert(bc, &ctx);
+
+	dec_pending(io, r);
+}
+
+static void kbyteswapd_queue_io(struct byteswap_io *io)
+{
+	INIT_WORK(&io->work, kbyteswapd_do_work, io);
+	queue_work(_kbyteswapd_workqueue, &io->work);
+}
+
+/*
+ * <dev_path> <start>
+ */
+static int byteswap_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+{
+	struct byteswap_config *bc;
+
+	if (argc != 2) {
+		ti->error = "dm-byteswap: Not enough arguments";
+		return -EINVAL;
+	}
+
+	bc = kmalloc(sizeof(*bc), GFP_KERNEL);
+	if (!bc) {
+		ti->error = "dm-byteswap: Cannot allocate byteswap context";
+		return -ENOMEM;
+	}
+
+	bc->io_pool = mempool_create(MIN_IOS, mempool_alloc_slab,
+				     mempool_free_slab, _byteswap_io_pool);
+	if (!bc->io_pool) {
+		ti->error = "dm-byteswap: Cannot allocate byteswap io mempool";
+		goto bad1;
+	}
+
+	bc->page_pool = mempool_create(MIN_POOL_PAGES, mempool_alloc_page,
+				       mempool_free_page, NULL);
+	if (!bc->page_pool) {
+		ti->error = "dm-byteswap: Cannot allocate page mempool";
+		goto bad2;
+	}
+
+	if (sscanf(argv[1], SECTOR_FORMAT, &bc->start) != 1) {
+		ti->error = "dm-byteswap: Invalid device sector";
+		goto bad3;
+	}
+
+	if (dm_get_device(ti, argv[0], bc->start, ti->len,
+			  dm_table_get_mode(ti->table), &bc->dev)) {
+		ti->error = "dm-byteswap: Device lookup failed";
+		goto bad3;
+	}
+
+	ti->private = bc;
+	return 0;
+
+bad3:
+	mempool_destroy(bc->page_pool);
+bad2:
+	mempool_destroy(bc->io_pool);
+bad1:
+	return -EINVAL;
+}
+
+static void byteswap_dtr(struct dm_target *ti)
+{
+	struct byteswap_config *bc = (struct byteswap_config *)ti->private;
+
+	mempool_destroy(bc->page_pool);
+	mempool_destroy(bc->io_pool);
+
+	dm_put_device(ti, bc->dev);
+	kfree(bc);
+}
+
+static int byteswap_endio(struct bio *bio, unsigned int done, int error)
+{
+	struct byteswap_io *io = (struct byteswap_io *)bio->bi_private;
+	struct byteswap_config *bc = (struct byteswap_config *)io->target->private;
+
+	if (bio_data_dir(bio) == WRITE) {
+		/*
+		 * free the processed pages, even if
+		 * it's only a partially completed write
+		 */
+		byteswap_free_buffer_pages(bc->page_pool, bio, done);
+	}
+
+	if (bio->bi_size)
+		return 1;
+
+	bio_put(bio);
+
+	/*
+	 * successful reads are byteswapped by the worker thread
+	 */
+	if ((bio_data_dir(bio) == READ)
+	    && bio_flagged(bio, BIO_UPTODATE)) {
+		kbyteswapd_queue_io(io);
+		return 0;
+	}
+
+	dec_pending(io, error);
+	return error;
+}
+
+static inline struct bio *
+byteswap_clone(struct byteswap_config *bc, struct byteswap_io *io,
+	       struct bio *bio, sector_t sector, int *bvec_idx,
+	       struct convert_context *ctx)
+{
+	struct bio *clone;
+
+	if (bio_data_dir(bio) == WRITE) {
+		clone = byteswap_alloc_buffer(bc->page_pool, bio->bi_size,
+					      io->first_clone, bvec_idx);
+		if (clone) {
+			ctx->bio_out = clone;
+			if (byteswap_convert(bc, ctx) < 0) {
+				byteswap_free_buffer_pages(bc->page_pool, clone,
+							   clone->bi_size);
+				bio_put(clone);
+				return NULL;
+			}
+		}
+	} else
+		clone = bio_clone(bio, GFP_NOIO);
+
+	if (!clone)
+		return NULL;
+
+	clone->bi_private = io;
+	clone->bi_end_io = byteswap_endio;
+	clone->bi_bdev = bc->dev->bdev;
+	clone->bi_sector = bc->start + sector;
+	clone->bi_rw = bio->bi_rw;
+
+	return clone;
+}
+
+static int byteswap_map(struct dm_target *ti, struct bio *bio)
+{
+	struct byteswap_config *bc = (struct byteswap_config *)ti->private;
+	struct byteswap_io *io = mempool_alloc(bc->io_pool, GFP_NOIO);
+	struct convert_context ctx;
+	struct bio *clone;
+	unsigned int remaining = bio->bi_size;
+	sector_t sector = bio->bi_sector - ti->begin;
+	int bvec_idx = 0;
+
+	io->target = ti;
+	io->bio = bio;
+	io->first_clone = NULL;
+	io->error = 0;
+	atomic_set(&io->pending, 1); /* hold a reference */
+
+	if (bio_data_dir(bio) == WRITE)
+		byteswap_convert_init(bc, &ctx, NULL, bio, sector, 1);
+
+	/*
+	 * The allocated buffers can be smaller than the whole bio,
+	 * so repeat the whole process until all the data can be handled.
+	 */
+	while (remaining) {
+		clone = byteswap_clone(bc, io, bio, sector, &bvec_idx, &ctx);
+		if (!clone)
+			goto cleanup;
+
+		if (!io->first_clone) {
+			/*
+			 * hold a reference to the first clone, because it
+			 * holds the bio_vec array and that can't be freed
+			 * before all other clones are released
+			 */
+			bio_get(clone);
+			io->first_clone = clone;
+		}
+		atomic_inc(&io->pending);
+
+		remaining -= clone->bi_size;
+		sector += bio_sectors(clone);
+
+		generic_make_request(clone);
+
+		/* out of memory -> run queues */
+		if (remaining)
+			blk_run_queues();
+	}
+
+	/* drop reference, clones could have returned before we reach this */
+	dec_pending(io, 0);
+	return 0;
+
+cleanup:
+	if (io->first_clone) {
+		dec_pending(io, -ENOMEM);
+		return 0;
+	}
+
+	/* if no bio has been dispatched yet, we can directly return the error */
+	mempool_free(io, bc->io_pool);
+	return -ENOMEM;
+}
+
+static int byteswap_status(struct dm_target *ti, status_type_t type,
+			   char *result, unsigned int maxlen)
+{
+	struct byteswap_config *bc = (struct byteswap_config *)ti->private;
+	char buffer[32];
+
+	switch (type) {
+	case STATUSTYPE_INFO:
+		result[0] = '\0';
+		break;
+
+	case STATUSTYPE_TABLE:
+		format_dev_t(buffer, bc->dev->bdev->bd_dev);
+		snprintf(result, maxlen, "%s " SECTOR_FORMAT, buffer, bc->start);
+		break;
+	}
+	return 0;
+}
+
+static struct target_type byteswap_target = {
+	.name   = "byteswap",
+	.module = THIS_MODULE,
+	.ctr    = byteswap_ctr,
+	.dtr    = byteswap_dtr,
+	.map    = byteswap_map,
+	.status = byteswap_status,
+};
+
+static int __init dm_byteswap_init(void)
+{
+	int r;
+
+	_byteswap_io_pool = kmem_cache_create("dm-byteswap_io",
+					      sizeof(struct byteswap_io),
+					      0, 0, NULL, NULL);
+	if (!_byteswap_io_pool)
+		return -ENOMEM;
+
+	_kbyteswapd_workqueue = create_workqueue("kbyteswapd");
+	if (!_kbyteswapd_workqueue) {
+		r = -ENOMEM;
+		DMERR("couldn't create kbyteswapd");
+		goto bad1;
+	}
+
+	r = dm_register_target(&byteswap_target);
+	if (r < 0) {
+		DMERR("byteswap: register failed %d", r);
+		goto bad2;
+	}
+
+	return 0;
+
+bad2:
+	destroy_workqueue(_kbyteswapd_workqueue);
+bad1:
+	kmem_cache_destroy(_byteswap_io_pool);
+	return r;
+}
+
+static void __exit dm_byteswap_exit(void)
+{
+	int r = dm_unregister_target(&byteswap_target);
+
+	if (r < 0)
+		DMERR("byteswap: unregister failed %d", r);
+
+	destroy_workqueue(_kbyteswapd_workqueue);
+	kmem_cache_destroy(_byteswap_io_pool);
+}
+
+module_init(dm_byteswap_init);
+module_exit(dm_byteswap_exit);
+
+MODULE_AUTHOR("Christophe Saout, Bartlomiej Zolnierkiewicz");
+MODULE_DESCRIPTION(DM_NAME " target for transparent byteswapping");
+MODULE_LICENSE("GPL");
diff -puN drivers/md/Kconfig~dm-byteswap drivers/md/Kconfig
--- linux-2.6.4-rc1/drivers/md/Kconfig~dm-byteswap	2004-02-29 16:42:42.000000000 +0100
+++ linux-2.6.4-rc1-root/drivers/md/Kconfig	2004-02-29 23:00:32.295887448 +0100
@@ -188,5 +188,20 @@ config DM_CRYPT
 
 	  If unsure, say N.
 
+config DM_BYTESWAP
+	tristate "Byteswap target support"
+	depends on BLK_DEV_DM && EXPERIMENTAL
+	help
+	  This device-mapper target allows you to create a device that
+	  transparently byteswaps the data on it.  It is useful for
+	  accessing non-native IDE disks on machines with byteswapped IDE bus
+	  (Atari/Q40/Q60/TiVo) and for accessing IDE disks with byteswapped
+	  content on machines with normal IDE bus.
+
+	  To compile this code as a module, choose M here: the module will
+	  be called dm_byteswap.
+
+	  If unsure, say N.
+
 endmenu
 
diff -puN drivers/md/Makefile~dm-byteswap drivers/md/Makefile
--- linux-2.6.4-rc1/drivers/md/Makefile~dm-byteswap	2004-02-29 16:42:45.000000000 +0100
+++ linux-2.6.4-rc1-root/drivers/md/Makefile	2004-02-29 16:48:35.000000000 +0100
@@ -24,6 +24,7 @@ obj-$(CONFIG_MD_MULTIPATH)	+= multipath.
 obj-$(CONFIG_BLK_DEV_MD)	+= md.o
 obj-$(CONFIG_BLK_DEV_DM)	+= dm-mod.o
 obj-$(CONFIG_DM_CRYPT)		+= dm-crypt.o
+obj-$(CONFIG_DM_BYTESWAP)	+= dm-byteswap.o
 
 quiet_cmd_unroll = UNROLL  $@
       cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \

_
