--- linux-2.3.48-pre2/Documentation/kiobuf.sample.c.~1~	Fri Feb 25 15:17:17 2000
+++ linux-2.3.48-pre2/Documentation/kiobuf.sample.c	Fri Feb 25 16:26:01 2000
@@ -0,0 +1,216 @@
+/*
+ * Example code for using kiobufs within a device driver for memory
+ * mapping of kernel memory into user space.
+ *
+ * This module creates a device driver which allocates memory in the
+ * kernel from arbitrary pages via vmalloc(), and then uses kiobufs to
+ * map those into user space. 
+ *
+ * This module registers a misc char device driver called "test".  Its
+ * major number will be 10; the minor number is registered dynamically
+ * and can be found by looking up /proc/misc (the usual minor number
+ * will be 63 unless other misc devices have already been registered).
+ * 
+ * The device driver here can be opened, but read and write functions
+ * are not declared.  However, the file descriptor to the driver can be
+ * mmap()ed, and the driver will use the kiobuf mmaping routines to map
+ * an area of kernel memory into a process's address space.
+ *
+ * Any number of processes may map the same memory at once, and it will
+ * act as shared memory.  The kiobuf_vmap code will track the number of
+ * references to the memory, and the driver's module reference count is
+ * adjusted to make sure that the driver can be unloaded only when there
+ * are no users active.
+ *
+ * Written by Stephen C. Tweedie, 2000
+ * (C) Red Hat, Inc. 2000 
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/vmalloc.h>
+#include <linux/miscdevice.h>
+#include <linux/iobuf.h>
+#include <asm/semaphore.h>
+
+#if 1
+#define dprintk(x...)
+#else
+#define dprintk printk
+#endif
+
+static int kiomap_mmap(struct file * file, struct vm_area_struct * vma);
+static int kiomap_open(struct inode * inode, struct file * file);
+static int kiomap_release(struct inode * inode, struct file * file);
+
+
+static struct file_operations kiomap_fops = {
+	mmap:	kiomap_mmap,
+	open:	kiomap_open,
+	release:kiomap_release,
+};
+
+static struct miscdevice kiomap_device = {
+	minor:	MISC_DYNAMIC_MINOR,
+	name:	"kiomap",
+	fops:	&kiomap_fops
+};
+
+
+static void * local_data_area;
+#define DATA_SIZE (100 * PAGE_SIZE)
+
+static struct kiobuf local_kiobuf;
+static struct kiobuf_vmap local_vmap;
+
+
+/* A pointer to the kiomap_finished function will be stored in the
+ * kiobuf_vmap we use for mmap()ing, and this function will be called
+ * once there are no more users of the kvmap.  */
+
+static void kiomap_finished(struct kiobuf_vmap *vmap)
+{
+	MOD_DEC_USE_COUNT;
+	dprintk(KERN_INFO __FUNCTION__ ": decremented module count\n");
+}
+
+
+/* The initialisation function here creates three data structures.
+ * First of all it uses vmalloc() to reserve an area of memory.
+ * Secondly, it initialises a kiobuf into which that vmalloced memory is
+ * mapped.  Finally, it initialises a kiobuf_vmap which can be used to
+ * mmap that kiobuf into user space. */
+
+void __init create_local_heap(void)
+{
+	int err;
+	
+	/* Get our kernel memory for the data heap first */
+
+	local_data_area = vmalloc(DATA_SIZE);
+	if (!local_data_area)
+		return;
+
+	/* Now initialise a kiobuf and kiobuf_vmap structure */
+
+	kiobuf_init(&local_kiobuf);
+	kvmap_init(&local_vmap, &local_kiobuf);
+	local_vmap.kiobuf = &local_kiobuf;
+	local_vmap.deref_callback = kiomap_finished;
+	
+	/* map_kernel_kiobuf will find all of the physical pages
+	 * referred to by the vmalloc()ed virtual memory area, and will
+	 * map those physical pages into the kiobuf we have just
+	 * prepared. */
+
+	err = map_kernel_kiobuf(&local_kiobuf, 
+				(unsigned long) local_data_area, 
+				DATA_SIZE);
+	if (err) {
+		vfree(local_data_area);
+		local_data_area = 0;
+	}
+
+	/* Initialise the vmalloced area --- we don't want users mapping
+	 * this memory and peeking into stale kernel data! */
+
+	memset(local_data_area, 0xff, DATA_SIZE);
+}
+
+
+/* All we have to do on device open/close is to maintain the module
+ * reference counts. */
+
+static int kiomap_open(struct inode * inode, struct file * file)
+{
+	MOD_INC_USE_COUNT;
+	dprintk(KERN_INFO __FUNCTION__ ": incremented module count\n");
+	return 0;
+}
+
+static int kiomap_release(struct inode * inode, struct file * file)
+{
+	MOD_DEC_USE_COUNT;
+	dprintk(KERN_INFO __FUNCTION__ ": decremented module count\n");
+	return 0;
+}
+
+/* This is our example device's mmap function, as declared to the rest
+ * of the VM.  All we have to do in our case is call mmap_kiobuf()
+ * supplying the pre-initialised kiobuf_vmap struct that we created in
+ * create_local_heap() above.
+ * 
+ * We have to be careful to take the vmap semaphore here before calling
+ * mmap_kiobuf --- that's something all callers of that function will
+ * always have to do to be safe on SMP systems.  While we hold that
+ * semaphore we can change the module reference count safely, since the
+ * same semaphore will protect the call to MOD_DEC_USE_COUNT in the
+ * kvmap close function above.
+ */
+
+static int kiomap_mmap(struct file * file, struct vm_area_struct * vma)
+{
+	int err;
+
+	dprintk (KERN_INFO __FUNCTION__ ": begin(file %p, vma %p)\n", 
+		 file, vma);
+
+	/* A quick check to make sure we were initialised properly... */
+
+	if (!local_data_area)
+		return -ENOMEM;
+
+	/* Now we can take the kvmap semaphore and perform the mmap. */
+
+	down(&local_vmap.sem);
+	dprintk (KERN_INFO __FUNCTION__ ": Attempting mmap.\n");
+	err = mmap_kiobuf(&local_vmap, vma);
+	dprintk (KERN_INFO __FUNCTION__ ": mmap_kiobuf returned %d\n", err);
+
+	/* A return value of one means the kvmap was not in use when we
+	 * called mmap_kiobuf() */
+
+	if (err == 1) {
+		MOD_INC_USE_COUNT;
+		dprintk(KERN_INFO __FUNCTION__ ": incremented module count\n");
+	}
+	up(&local_vmap.sem);
+
+	/* and a negative return value means an error. */
+
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+
+/* The module initialisation and cleanup functions just create and
+ * destroy our local kvmap data structures, and register and deregister
+ * the testing character device driver. */
+
+int kiomap_init_module(void)
+{
+	int err;
+	
+	create_local_heap();
+	if (!local_data_area)
+		return -ENOMEM;
+	
+	err = misc_register(&kiomap_device);
+	return err;
+}
+
+int kiomap_destroy_module(void)
+{
+	int err;
+
+	vfree(local_data_area);
+	err = misc_deregister(&kiomap_device);
+	return err;
+}
+
+module_init(kiomap_init_module);
+module_exit(kiomap_destroy_module);
+
+MODULE_DESCRIPTION("kiobuf vmap test driver");
--- linux-2.3.48-pre2/drivers/char/raw.c.~1~	Fri Feb 25 10:08:48 2000
+++ linux-2.3.48-pre2/drivers/char/raw.c	Fri Feb 25 15:18:58 2000
@@ -197,14 +197,17 @@
 			raw_device_bindings[minor] = 
 				bdget(kdev_t_to_nr(MKDEV(rq.block_major, rq.block_minor)));
 		} else {
+			struct block_device *bdev;
 			kdev_t dev;
-			if (!raw_device_bindings[minor]) {
-				err = -ENODEV;
-				break;
+
+			bdev = raw_device_bindings[minor];
+			if (bdev) {
+				dev = to_kdev_t(bdev->bd_dev);
+				rq.block_major = MAJOR(dev);
+				rq.block_minor = MINOR(dev);
+			} else {
+				rq.block_major = rq.block_minor = 0;
 			}
-			dev = to_kdev_t(raw_device_bindings[minor]->bd_dev);
-			rq.block_major = MAJOR(dev);
-			rq.block_minor = MINOR(dev);
 			err = copy_to_user((void *) arg, &rq, sizeof(rq));
 		}
 		break;
@@ -304,7 +307,12 @@
 		err = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize);
 		if (err)
 			break;
-		
+#if 0
+		err = lock_kiovec(1, &iobuf, 1);
+		if (err) 
+			break;
+#endif
+	
 		for (i=0; i < blocks; i++) 
 			b[i] = blocknr++;
 		
@@ -316,7 +324,7 @@
 			buf += err;
 		}
 
-		unmap_kiobuf(iobuf);
+		unmap_kiobuf(iobuf); /* The unlock_kiobuf is implicit here */
 
 		if (err != iosize)
 			break;
--- linux-2.3.48-pre2/fs/buffer.c.~1~	Fri Feb 25 10:08:49 2000
+++ linux-2.3.48-pre2/fs/buffer.c	Fri Feb 25 15:17:17 2000
@@ -1754,10 +1754,10 @@
 	mark_buffer_uptodate(bh, uptodate);
 
 	kiobuf = bh->b_kiobuf;
-	if (!uptodate)
-		kiobuf->errno = -EIO;
-	if (atomic_dec_and_test(&kiobuf->io_count))
-		kiobuf->end_io(kiobuf);
+	unlock_buffer(bh);
+	
+	kiobuf = bh->b_kiobuf;
+	end_kio_request(kiobuf, uptodate);
 }
 
 
@@ -1766,8 +1766,7 @@
  * for them to complete.  Clean up the buffer_heads afterwards.  
  */
 
-static int do_kio(struct kiobuf *kiobuf,
-		  int rw, int nr, struct buffer_head *bh[], int size)
+static int do_kio(int rw, int nr, struct buffer_head *bh[], int size)
 {
 	int iosize;
 	int i;
@@ -1778,18 +1777,20 @@
 
 	if (rw == WRITE)
 		rw = WRITERAW;
-	atomic_add(nr, &kiobuf->io_count);
-	kiobuf->errno = 0;
 	ll_rw_block(rw, nr, bh);
 
-	kiobuf_wait_for_io(kiobuf);
-	
-	spin_lock(&unused_list_lock);
-	
 	iosize = 0;
+	spin_lock(&unused_list_lock);
+
 	for (i = nr; --i >= 0; ) {
 		iosize += size;
 		tmp = bh[i];
+		if (buffer_locked(tmp)) {
+			spin_unlock(&unused_list_lock);
+			wait_on_buffer(tmp);
+			spin_lock(&unused_list_lock);
+		}
+		
 		if (!buffer_uptodate(tmp)) {
 			/* We are traversing bh'es in reverse order so
                            clearing iosize on error calculates the
@@ -1801,11 +1802,7 @@
 	
 	spin_unlock(&unused_list_lock);
 
-	if (iosize)
-		return iosize;
-	if (kiobuf->errno)
-		return kiobuf->errno;
-	return -EIO;
+	return iosize;
 }
 
 /*
@@ -1847,8 +1844,6 @@
 		if ((iobuf->offset & (size-1)) ||
 		    (iobuf->length & (size-1)))
 			return -EINVAL;
-		if (!iobuf->locked)
-			panic("brw_kiovec: iobuf not locked for I/O");
 		if (!iobuf->nr_pages)
 			panic("brw_kiovec: iobuf not initialised");
 	}
@@ -1861,10 +1856,15 @@
 		iobuf = iovec[i];
 		offset = iobuf->offset;
 		length = iobuf->length;
-
+		iobuf->errno = 0;
+		
 		for (pageind = 0; pageind < iobuf->nr_pages; pageind++) {
 			map  = iobuf->maplist[pageind];
-
+			if (!map) {
+				err = -EFAULT;
+				goto error;
+			}
+			
 			while (length > 0) {
 				blocknr = b[bufind++];
 				tmp = get_unused_buffer_head(0);
@@ -1893,11 +1893,13 @@
 				length -= size;
 				offset += size;
 
+				atomic_inc(&iobuf->io_count);
+	
 				/* 
 				 * Start the IO if we have got too much 
 				 */
 				if (bhind >= KIO_MAX_SECTORS) {
-					err = do_kio(iobuf, rw, bhind, bh, size);
+					err = do_kio(rw, bhind, bh, size);
 					if (err >= 0)
 						transferred += err;
 					else
@@ -1915,7 +1917,7 @@
 
 	/* Is there any IO still left to submit? */
 	if (bhind) {
-		err = do_kio(iobuf, rw, bhind, bh, size);
+		err = do_kio(rw, bhind, bh, size);
 		if (err >= 0)
 			transferred += err;
 		else
--- linux-2.3.48-pre2/fs/iobuf.c.~1~	Sun Jan 23 20:34:33 2000
+++ linux-2.3.48-pre2/fs/iobuf.c	Fri Feb 25 15:17:17 2000
@@ -12,18 +12,21 @@
 
 static kmem_cache_t *kiobuf_cachep;
 
-/*
- * The default IO completion routine for kiobufs: just wake up
- * the kiobuf, nothing more.  
- */
 
-void simple_wakeup_kiobuf(struct kiobuf *kiobuf)
+void end_kio_request(struct kiobuf *kiobuf, int uptodate)
 {
-	wake_up(&kiobuf->wait_queue);
+	if ((!uptodate) && !kiobuf->errno)
+		kiobuf->errno = -EIO;
+
+	if (atomic_dec_and_test(&kiobuf->io_count)) {
+		if (kiobuf->end_io)
+			kiobuf->end_io(kiobuf);
+		wake_up(&kiobuf->wait_queue);
+	}
 }
 
 
-void __init kiobuf_init(void)
+void __init kiobuf_setup(void)
 {
 	kiobuf_cachep =  kmem_cache_create("kiobuf",
 					   sizeof(struct kiobuf),
@@ -33,6 +36,13 @@
 		panic("Cannot create kernel iobuf cache\n");
 }
 
+void kiobuf_init(struct kiobuf *iobuf)
+{
+	memset(iobuf, 0, sizeof(*iobuf));
+	init_waitqueue_head(&iobuf->wait_queue);
+	iobuf->array_len = KIO_STATIC_PAGES;
+	iobuf->maplist   = iobuf->map_array;
+}
 
 int alloc_kiovec(int nr, struct kiobuf **bufp)
 {
@@ -45,12 +55,7 @@
 			free_kiovec(i, bufp);
 			return -ENOMEM;
 		}
-		
-		memset(iobuf, 0, sizeof(*iobuf));
-		init_waitqueue_head(&iobuf->wait_queue);
-		iobuf->end_io = simple_wakeup_kiobuf;
-		iobuf->array_len = KIO_STATIC_PAGES;
-		iobuf->maplist   = iobuf->map_array;
+		kiobuf_init(iobuf);
 		*bufp++ = iobuf;
 	}
 	
@@ -64,6 +69,8 @@
 	
 	for (i = 0; i < nr; i++) {
 		iobuf = bufp[i];
+		if (iobuf->locked)
+			unlock_kiovec(1, &iobuf);
 		if (iobuf->array_len > KIO_STATIC_PAGES)
 			kfree (iobuf->maplist);
 		kmem_cache_free(kiobuf_cachep, bufp[i]);
@@ -103,6 +110,9 @@
 {
 	struct task_struct *tsk = current;
 	DECLARE_WAITQUEUE(wait, tsk);
+
+	if (atomic_read(&kiobuf->io_count) == 0)
+		return;
 
 	add_wait_queue(&kiobuf->wait_queue, &wait);
 repeat:
--- linux-2.3.48-pre2/include/linux/iobuf.h.~1~	Fri Feb 25 14:59:49 2000
+++ linux-2.3.48-pre2/include/linux/iobuf.h	Fri Feb 25 15:17:17 2000
@@ -29,6 +29,8 @@
 #define KIO_STATIC_PAGES	(KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10) + 1)
 #define KIO_MAX_SECTORS		(KIO_MAX_ATOMIC_IO * 2)
 
+/* The main kiobuf struct used for all our IO! */
+
 struct kiobuf 
 {
 	int		nr_pages;	/* Pages actually referenced */
@@ -46,7 +48,6 @@
 	unsigned int	locked : 1;	/* If set, pages has been locked */
 	
 	/* Always embed enough struct pages for 64k of IO */
-	unsigned long	page_array[KIO_STATIC_PAGES];
 	struct page *	map_array[KIO_STATIC_PAGES];
 
 	/* Dynamic state for IO completion: */
@@ -57,14 +58,51 @@
 };
 
 
+/* For true mmap() of kiobufs, we need to be able to refcount the number
+ * of vmas accessing the kiobuf to be able to clean up properly when the
+ * entire kiobuf is no longer being accessed. 
+ *
+ * The kvmap semaphore is necessary to synchronise reference counting in
+ * all cases.  It is not sufficient to rely on the mm semaphore for
+ * this, as vmap references are inherited over fork() and we need to do
+ * the right thing for vmaps which end up shared as a result.
+ */
+
+struct kiobuf_vmap;
+typedef void kvmap_deref_fn (struct kiobuf_vmap *);
+
+struct kiobuf_vmap 
+{
+	struct kiobuf * kiobuf;
+	void *		private_data;
+	
+	struct semaphore sem;
+
+	/* The following are always protected by the semaphore mutex */
+	int		refcount;
+	kvmap_deref_fn	*deref_callback;
+};
+
+
 /* mm/memory.c */
 
 int	map_user_kiobuf(int rw, struct kiobuf *, unsigned long va, size_t len);
+int	map_kernel_kiobuf(struct kiobuf *, unsigned long va, size_t len);
 void	unmap_kiobuf(struct kiobuf *iobuf);
+int	lock_kiovec(int nr, struct kiobuf *iovec[], int wait);
+int	unlock_kiovec(int nr, struct kiobuf *iovec[]);
+
+/* mm/iomap.c */
+
+#define KIOMAP_PREFAULT		0x0001
+int mmap_kiobuf(struct kiobuf_vmap *iobuf, struct vm_area_struct * vma);
+void kvmap_init(struct kiobuf_vmap *, struct kiobuf *);
 
 /* fs/iobuf.c */
 
-void __init kiobuf_init(void);
+void __init kiobuf_setup(void);
+void	kiobuf_init(struct kiobuf *);
+void	end_kio_request(struct kiobuf *, int);
 void	simple_wakeup_kiobuf(struct kiobuf *);
 int	alloc_kiovec(int nr, struct kiobuf **);
 void	free_kiovec(int nr, struct kiobuf **);
--- linux-2.3.48-pre2/init/main.c.~1~	Thu Feb 17 11:50:55 2000
+++ linux-2.3.48-pre2/init/main.c	Fri Feb 25 15:17:17 2000
@@ -534,7 +534,7 @@
 	vma_init();
 	buffer_init(mempages);
 	page_cache_init(mempages);
-	kiobuf_init();
+	kiobuf_setup();
 	signals_init();
 	bdev_init();
 	inode_init();
--- linux-2.3.48-pre2/kernel/ksyms.c.~1~	Fri Feb 25 10:08:50 2000
+++ linux-2.3.48-pre2/kernel/ksyms.c	Fri Feb 25 15:17:17 2000
@@ -43,6 +43,7 @@
 #include <linux/mm.h>
 #include <linux/capability.h>
 #include <linux/highuid.h>
+#include <linux/iobuf.h>
 
 #if defined(CONFIG_PROC_FS)
 #include <linux/proc_fs.h>
@@ -156,11 +157,6 @@
 EXPORT_SYMBOL(mark_buffer_dirty);
 EXPORT_SYMBOL(__mark_buffer_dirty);
 EXPORT_SYMBOL(__mark_inode_dirty);
-EXPORT_SYMBOL(free_kiovec);
-EXPORT_SYMBOL(brw_kiovec);
-EXPORT_SYMBOL(alloc_kiovec);
-EXPORT_SYMBOL(expand_kiobuf);
-EXPORT_SYMBOL(unmap_kiobuf);
 EXPORT_SYMBOL(get_empty_filp);
 EXPORT_SYMBOL(init_private_file);
 EXPORT_SYMBOL(filp_open);
@@ -339,6 +335,23 @@
 /* Various random spinlocks we want to export */
 EXPORT_SYMBOL(tqueue_lock);
 #endif
+
+/* Kiobufs */
+EXPORT_SYMBOL(kiobuf_init);
+EXPORT_SYMBOL(kvmap_init);
+
+EXPORT_SYMBOL(alloc_kiovec);
+EXPORT_SYMBOL(free_kiovec);
+EXPORT_SYMBOL(expand_kiobuf);
+
+EXPORT_SYMBOL(map_user_kiobuf);
+EXPORT_SYMBOL(map_kernel_kiobuf);
+EXPORT_SYMBOL(unmap_kiobuf);
+EXPORT_SYMBOL(mmap_kiobuf);
+
+EXPORT_SYMBOL(lock_kiovec);
+EXPORT_SYMBOL(unlock_kiovec);
+EXPORT_SYMBOL(brw_kiovec);
 
 /* autoirq from  drivers/net/auto_irq.c */
 EXPORT_SYMBOL(autoirq_setup);
--- linux-2.3.48-pre2/mm/Makefile.~1~	Fri Dec 10 15:24:41 1999
+++ linux-2.3.48-pre2/mm/Makefile	Fri Feb 25 15:17:17 2000
@@ -10,7 +10,7 @@
 O_TARGET := mm.o
 O_OBJS	 := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \
 	    vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \
-	    page_alloc.o swap_state.o swapfile.o numa.o
+	    page_alloc.o swap_state.o swapfile.o numa.o iomap.o
 
 ifeq ($(CONFIG_HIGHMEM),y)
 O_OBJS += highmem.o
--- linux-2.3.48-pre2/mm/iomap.c.~1~	Fri Feb 25 15:17:17 2000
+++ linux-2.3.48-pre2/mm/iomap.c	Fri Feb 25 15:17:17 2000
@@ -0,0 +1,144 @@
+/*
+ * iomap.c
+ *
+ * Perform mmap()ing of arbitrary kiobufs.  
+ *
+ * Written by Stephen C. Tweedie, 2000
+ * (C) Red Hat, Inc.  2000
+ *
+ * Refer to Documentation/kiobuf* for instructions.
+ *
+ * The kiobuf_vmap structure contains the information necessary to track
+ * the mmap of a kiobuf into user space.  The rest of this file defines
+ * functions necessary to maintain that mapping.
+ */
+
+#include <linux/iobuf.h>
+#include <linux/pagemap.h>
+#include <asm/atomic.h>
+
+#define dprintk(x...)
+
+/*
+ * Open/close methods for the kvmap only need to track the reference counts. 
+ *
+ * Both open and close will be called with the vma mm semaphore held,
+ * but without the mm page lock.
+ */
+
+static void kvmap_open(struct vm_area_struct *vma) 
+{
+	struct kiobuf_vmap *vmap;
+	vmap = (struct kiobuf_vmap *) vma->vm_private_data;
+
+	/* Just increment the refcount on open: there has been an unmap
+	 * or fork increasing the number of vmas on this kvmap. */
+	down(&vmap->sem);
+	vmap->refcount++;
+	up(&vmap->sem);
+}
+
+static void kvmap_close(struct vm_area_struct *vma) 
+{
+	struct kiobuf_vmap *vmap;
+	vmap = (struct kiobuf_vmap *) vma->vm_private_data;
+	
+	down(&vmap->sem);
+	if (--vmap->refcount == 0) {
+		if (vmap->deref_callback)
+			vmap->deref_callback(vmap);
+	}
+	up(&vmap->sem);
+}
+
+
+static struct page * kvmap_nopage(struct vm_area_struct * vma,
+				  unsigned long address, 
+				  int no_share)
+{
+	unsigned long pgoff;
+	struct kiobuf_vmap *vmap;
+	struct kiobuf *iobuf;
+	struct page *page;
+
+	vmap = (struct kiobuf_vmap *) vma->vm_private_data;
+	iobuf = vmap->kiobuf;
+	pgoff = ((address - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff;
+	dprintk(KERN_INFO __FUNCTION__ "(%p, %p, %d): offset %lu\n",
+	       vma, (void *) address, no_share, pgoff);
+	
+	if (no_share)
+		BUG();
+
+	/* mmap() of a larger region than specified by the kiobuf
+	 * results in a SIGBUS, as does faulting on a page not present
+	 * in the kiobuf. */
+	
+	if (pgoff > iobuf->nr_pages) {
+		dprintk(KERN_INFO __FUNCTION__ ": no such page in iobuf\n");
+		return NULL;
+	}
+	
+	page = iobuf->maplist[pgoff];
+	dprintk(KERN_INFO __FUNCTION__ ": found page %p\n", page);
+	if (!page)
+		return NULL;
+	
+	/* We need to obey the same rules as copy_page_range() when it
+	 * comes to maintaining reference counts on pages in the kvmap.
+	 * We only increment the refcount for normal, physically
+	 * present, unreserved pages. */
+	
+	if ((page-mem_map) < max_mapnr && ! PageReserved(page)) {
+		get_page(page);
+		dprintk(KERN_INFO __FUNCTION__ ": page count now %d\n",
+		       atomic_read(&page->count));
+	}
+
+	return page;
+}
+
+
+static struct vm_operations_struct kio_vmops = 
+{
+	open:	kvmap_open,
+	close:	kvmap_close,
+	nopage:	kvmap_nopage,
+};
+
+void kvmap_init(struct kiobuf_vmap *vmap, struct kiobuf *iobuf)
+{
+	memset(vmap, 0, sizeof(*vmap));
+	vmap->kiobuf = iobuf;
+	init_MUTEX(&vmap->sem);
+}
+
+/*
+ * This routine is intended to be called by the mmap_* methods of other
+ * device drivers.  
+ * 
+ * Returns <0 on error, 1 if this is the first reference to the vmap, else 0.
+ *
+ * The vmap semaphore must be held before calling this.
+ */
+
+int mmap_kiobuf(struct kiobuf_vmap *vmap, struct vm_area_struct * vma) 
+{
+	int retval = 0;
+	
+	dprintk(KERN_INFO __FUNCTION__ "(vmap %p, vma %p)\n", vmap, vma);
+
+	vma->vm_ops = &kio_vmops;
+	vma->vm_private_data = vmap;
+	vma->vm_flags |= VM_LOCKED;	/* Don't swap out kvmaps! */
+	
+	/* This is supposed to be a shared map --- reject COW mappings. */
+	if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vmap->refcount++;
+	if (vmap->refcount == 1)
+		retval = 1;
+
+	return retval;
+}
--- linux-2.3.48-pre2/mm/memory.c.~1~	Fri Feb 25 10:08:50 2000
+++ linux-2.3.48-pre2/mm/memory.c	Fri Feb 25 15:17:17 2000
@@ -408,28 +408,25 @@
 			return pte_page(*pte);
 	}
 	
-	printk(KERN_ERR "Missing page in follow_page\n");
 	return NULL;
 }
 
 /* 
- * Given a physical address, is there a useful struct page pointing to it?
+ * Given a physical address, is there a useful struct page pointing to
+ * it?  This may become more complex in the future if we start dealing
+ * with IO-aperture pages in kiobufs.
  */
 
-struct page * get_page_map(struct page *page, unsigned long vaddr)
+static inline struct page * get_page_map(struct page *page)
 {
-	if (MAP_NR(vaddr) >= max_mapnr)
-		return 0;
-	if (page == ZERO_PAGE(vaddr))
-		return 0;
-	if (PageReserved(page))
+	if (page > (mem_map + max_mapnr))
 		return 0;
 	return page;
 }
 
 /*
  * Force in an entire range of pages from the current process's user VA,
- * and pin and lock the pages for IO.  
+ * and pin them in physical memory.  
  */
 
 #define dprintk(x...)
@@ -440,8 +437,6 @@
 	struct mm_struct *	mm;
 	struct vm_area_struct *	vma = 0;
 	struct page *		map;
-	int			doublepage = 0;
-	int			repeat = 0;
 	int			i;
 	
 	/* Make sure the iobuf is not already mapped somewhere. */
@@ -457,11 +452,10 @@
 	if (err)
 		return err;
 
- repeat:
 	down(&mm->mmap_sem);
 
 	err = -EFAULT;
-	iobuf->locked = 1;
+	iobuf->locked = 0;
 	iobuf->offset = va & ~PAGE_MASK;
 	iobuf->length = len;
 	
@@ -481,16 +475,15 @@
 		spin_lock(&mm->page_table_lock);
 		map = follow_page(ptr);
 		if (!map) {
+			spin_unlock(&mm->page_table_lock);
 			dprintk (KERN_ERR "Missing page in map_user_kiobuf\n");
-			goto retry;
+			goto out_unlock;
 		}
-		map = get_page_map(map, ptr);
-		if (map) {
-			if (TryLockPage(map)) {
-				goto retry;
-			}
+		map = get_page_map(map);
+		if (map)
 			atomic_inc(&map->count);
-		}
+		else
+			printk (KERN_INFO "Mapped page missing [%d]\n", i);
 		spin_unlock(&mm->page_table_lock);
 		iobuf->maplist[i] = map;
 		iobuf->nr_pages = ++i;
@@ -507,42 +500,110 @@
 	unmap_kiobuf(iobuf);
 	dprintk ("map_user_kiobuf: end %d\n", err);
 	return err;
+}
 
- retry:
 
-	/* 
-	 * Undo the locking so far, wait on the page we got to, and try again.
-	 */
-	spin_unlock(&mm->page_table_lock);
-	unmap_kiobuf(iobuf);
-	up(&mm->mmap_sem);
+/*
+ * Force in an entire range of pages from the current process's kernel
+ * VA.  We do not expect to see unmapped pages here, so no page faults
+ * will be taken.  The map_kernel_kiobuf() routine should work happily
+ * both for normal kernel allocations and for vmalloc()ed regions.  
+ */
 
-	/* 
-	 * Did the release also unlock the page we got stuck on?
-	 */
-	if (map) {
-		if (!PageLocked(map)) {
-			/* If so, we may well have the page mapped twice
-			 * in the IO address range.  Bad news.  Of
-			 * course, it _might_ * just be a coincidence,
-			 * but if it happens more than * once, chances
-			 * are we have a double-mapped page. */
-			if (++doublepage >= 3) {
-				return -EINVAL;
-			}
+static inline int map_pte_range(struct page **pmap, pmd_t * pmd, unsigned long va, unsigned long end)
+{
+	pte_t * pte;
+	int nr_pages = 0;
+	struct page *page;
+	
+	if (pmd_none(*pmd))
+		return 0;	
+
+	pte = pte_offset(pmd, va);
+
+	do {
+		if (pte_none(*pte))
+			page = NULL;
+		else {
+			page = pte_page(*pte);
+			atomic_inc(&page->count);
 		}
+		*pmap++ = page;
+
+		va += PAGE_SIZE;
+		pte++;
+		nr_pages++;
+	} while (va < end && (va && PMD_MASK));
 	
-		/*
-		 * Try again...
-		 */
-		wait_on_page(map);
-	}
+	return nr_pages;
+}
+
+static inline int map_pmd_range(struct page **pmap, pgd_t * dir, unsigned long va, unsigned long end)
+{
+	pmd_t * pmd;
+	int total_pages = 0;
+	int nr_pages;
 	
-	if (++repeat < 16) {
-		ptr = va & PAGE_MASK;
-		goto repeat;
+	if (pgd_none(*dir))
+		return 0;
+	pmd = pmd_offset(dir, va);
+	
+	do {
+		nr_pages = map_pte_range(pmap, pmd, va, end);
+		pmd++;
+		pmap += nr_pages;
+		va += (nr_pages << PAGE_SHIFT);
+		total_pages += nr_pages;
+	} while (nr_pages && va < end && (va && PGDIR_MASK));
+	return total_pages;
+}
+
+int map_kernel_kiobuf(struct kiobuf *iobuf, unsigned long va, size_t len)
+{
+	unsigned long		ptr, end;
+	int			err;
+	struct mm_struct *	mm;
+	struct page **		pmap;
+	int			nr_pages;
+	pgd_t *			dir;
+	
+	/* Make sure the iobuf is not already mapped somewhere. */
+	if (iobuf->nr_pages)
+		return -EINVAL;
+
+	mm = current->mm;
+	dprintk ("map_kernel_kiobuf: begin\n");
+	
+	ptr = va & PAGE_MASK;
+	end = (va + len + PAGE_SIZE - 1) & PAGE_MASK;
+	err = expand_kiobuf(iobuf, (end - ptr) >> PAGE_SHIFT);
+	if (err)
+		return err;
+
+	err = -EFAULT;
+	iobuf->locked = 0;
+	iobuf->nr_pages = 0;
+	iobuf->offset = va & ~PAGE_MASK;
+	iobuf->length = len;
+	
+	spin_lock(&mm->page_table_lock);
+
+	dir = pgd_offset(mm, va);
+	pmap = iobuf->maplist;
+	
+	while (ptr < end) {
+		nr_pages = map_pmd_range(pmap, dir, ptr, end);
+		if (!nr_pages) 
+			break;
+		dir++;
+		ptr += (nr_pages << PAGE_SHIFT);
+		pmap += nr_pages;
+		iobuf->nr_pages += nr_pages;
 	}
-	return -EAGAIN;
+	
+	spin_unlock(&mm->page_table_lock);
+	dprintk ("map_kernel_kiobuf: end OK\n");
+	return 0;
 }
 
 
@@ -558,9 +619,9 @@
 	
 	for (i = 0; i < iobuf->nr_pages; i++) {
 		map = iobuf->maplist[i];
-		
-		if (map && iobuf->locked) {
-			UnlockPage(map);
+		if (map) {
+			if (iobuf->locked)
+				UnlockPage(map);
 			__free_page(map);
 		}
 	}
@@ -568,6 +629,109 @@
 	iobuf->nr_pages = 0;
 	iobuf->locked = 0;
 }
+
+/*
+ * Lock down all of the pages of a kiovec for IO.
+ *
+ * If any page is mapped twice in the kiovec, we return the error -EINVAL.
+ *
+ * The optional wait parameter causes the lock call to block until all
+ * pages can be locked if set.  If wait==0, the lock operation is
+ * aborted if any locked pages are found and -EAGAIN is returned.
+ */
+
+int lock_kiovec(int nr, struct kiobuf *iovec[], int wait)
+{
+	struct kiobuf *iobuf;
+	int i, j;
+	struct page *page, **ppage;
+	int doublepage = 0;
+	int repeat = 0;
+	
+ repeat:
+	
+	for (i = 0; i < nr; i++) {
+		iobuf = iovec[i];
+
+		if (iobuf->locked)
+			continue;
+		iobuf->locked = 1;
+
+		ppage = iobuf->maplist;
+		for (j = 0; j < iobuf->nr_pages; ppage++, j++) {
+			page = *ppage;
+			if (!page)
+				continue;
+			
+			if (TryLockPage(page))
+				goto retry;
+		}
+	}
+
+	return 0;
+	
+ retry:
+	
+	/* 
+	 * We couldn't lock one of the pages.  Undo the locking so far,
+	 * wait on the page we got to, and try again.  
+	 */
+	
+	unlock_kiovec(nr, iovec);
+	if (!wait)
+		return -EAGAIN;
+	
+	/* 
+	 * Did the release also unlock the page we got stuck on?
+	 */
+	if (!PageLocked(page)) {
+		/* 
+		 * If so, we may well have the page mapped twice
+		 * in the IO address range.  Bad news.  Of
+		 * course, it _might_ just be a coincidence,
+		 * but if it happens more than once, chances
+		 * are we have a double-mapped page. 
+		 */
+		if (++doublepage >= 3) 
+			return -EINVAL;
+		
+		/* Try again...  */
+		wait_on_page(page);
+	}
+	
+	if (++repeat < 16)
+		goto repeat;
+	return -EAGAIN;
+}
+
+/*
+ * Unlock all of the pages of a kiovec after IO.
+ */
+
+int unlock_kiovec(int nr, struct kiobuf *iovec[])
+{
+	struct kiobuf *iobuf;
+	int i, j;
+	struct page *page, **ppage;
+	
+	for (i = 0; i < nr; i++) {
+		iobuf = iovec[i];
+
+		if (!iobuf->locked)
+			continue;
+		iobuf->locked = 0;
+		
+		ppage = iobuf->maplist;
+		for (j = 0; j < iobuf->nr_pages; ppage++, j++) {
+			page = *ppage;
+			if (!page)
+				continue;
+			UnlockPage(page);
+		}
+	}
+	return 0;
+}
+
 
 static inline void zeromap_pte_range(pte_t * pte, unsigned long address,
                                      unsigned long size, pgprot_t prot)
--- linux-2.3.48-pre2/mm/vmscan.c.~1~	Thu Feb 17 11:50:53 2000
+++ linux-2.3.48-pre2/mm/vmscan.c	Fri Feb 25 15:17:17 2000
@@ -259,7 +259,7 @@
 	unsigned long end;
 
 	/* Don't swap out areas which are locked down */
-	if (vma->vm_flags & VM_LOCKED)
+	if (vma->vm_flags & (VM_LOCKED | VM_IO))
 		return 0;
 
 	pgdir = pgd_offset(vma->vm_mm, address);
