From: Manfred Spraul <manfred@colorfullife.com>

I coded a poor mans magazine layer for slab: +13% performance for 
Robert's packet routing. Unfortunately this means even more arrays.
Could you look at the patch? It's not yet final, I want to make the size 
of the shared array tunable (replace SHARED_ARRAY_FACTOR with a 
kmem_cache_t member). Perhaps even default 0, I must think about it a 
bit more.

The size of the new shared array may be altered at runtime, by writing to
/proc/slabinfo.

Additionally, it reduces the default sizes of the array caches.

TODO:

a) Disable the magazine-vs-head array duplication on UP.

b) remove the hard-wired constant

c) Some way for the slab owner to tune the magazine size?


 mm/slab.c |  182 +++++++++++++++++++++++++++++++++++++++++++++++---------------
 1 files changed, 139 insertions(+), 43 deletions(-)

diff -puN mm/slab.c~slab-magazine-layer mm/slab.c
--- 25/mm/slab.c~slab-magazine-layer	2003-05-23 18:44:13.000000000 -0700
+++ 25-akpm/mm/slab.c	2003-05-23 18:44:34.000000000 -0700
@@ -208,6 +208,7 @@ struct kmem_list3 {
 	unsigned long	free_objects;
 	int		free_touched;
 	unsigned long	next_reap;
+	struct array_cache	*shared;
 };
 
 #define LIST3_INIT(parent) \
@@ -1195,6 +1196,7 @@ static void smp_call_function_all_cpus(v
 }
 
 static void free_block (kmem_cache_t* cachep, void** objpp, int len);
+static void drain_array_locked(kmem_cache_t* cachep, struct array_cache *ac);
 
 static void do_drain(void *arg)
 {
@@ -1203,13 +1205,20 @@ static void do_drain(void *arg)
 
 	check_irq_off();
 	ac = ac_data(cachep);
+	spin_lock(&cachep->spinlock);
 	free_block(cachep, &ac_entry(ac)[0], ac->avail);
+	spin_unlock(&cachep->spinlock);
 	ac->avail = 0;
 }
 
 static void drain_cpu_caches(kmem_cache_t *cachep)
 {
 	smp_call_function_all_cpus(do_drain, cachep);
+	check_irq_on();
+	spin_lock_irq(&cachep->spinlock);
+	if (cachep->lists.shared)
+		drain_array_locked(cachep, cachep->lists.shared);
+	spin_unlock_irq(&cachep->spinlock);
 }
 
 
@@ -1307,6 +1316,8 @@ int kmem_cache_destroy (kmem_cache_t * c
 		for (i = 0; i < NR_CPUS; i++)
 			kfree(cachep->array[i]);
 		/* NUMA: free the list3 structures */
+		kfree(cachep->lists.shared);
+		cachep->lists.shared = NULL;
 	}
 	kmem_cache_free(&cache_cache, cachep);
 
@@ -1649,6 +1660,19 @@ retry:
 
 	BUG_ON(ac->avail > 0);
 	spin_lock(&cachep->spinlock);
+	if (l3->shared) {
+		struct array_cache *shared_array = l3->shared;
+		if (shared_array->avail) {
+			if (batchcount > shared_array->avail)
+				batchcount = shared_array->avail;
+			shared_array->avail -= batchcount;
+			ac->avail = batchcount;
+			memcpy(ac_entry(ac), &ac_entry(shared_array)[shared_array->avail],
+					sizeof(void*)*batchcount);
+			shared_array->touched = 1;
+			goto alloc_done;
+		}
+	}
 	while (batchcount > 0) {
 		struct list_head *entry;
 		struct slab *slabp;
@@ -1672,6 +1696,7 @@ retry:
 
 must_grow:
 	l3->free_objects -= ac->avail;
+alloc_done:
 	spin_unlock(&cachep->spinlock);
 
 	if (unlikely(!ac->avail)) {
@@ -1770,13 +1795,11 @@ static inline void * __cache_alloc (kmem
  * the l3 structure
  */
 
-static inline void
-__free_block(kmem_cache_t *cachep, void **objpp, int nr_objects)
+static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects)
 {
 	int i;
 
-	check_irq_off();
-	spin_lock(&cachep->spinlock);
+	check_spinlock_acquired(cachep);
 
 	/* NUMA: move add into loop */
 	cachep->lists.free_objects += nr_objects;
@@ -1814,12 +1837,6 @@ __free_block(kmem_cache_t *cachep, void 
 				&list3_data_ptr(cachep, objp)->slabs_partial);
 		}
 	}
-	spin_unlock(&cachep->spinlock);
-}
-
-static void free_block(kmem_cache_t* cachep, void** objpp, int len)
-{
-	__free_block(cachep, objpp, len);
 }
 
 static void cache_flusharray (kmem_cache_t* cachep, struct array_cache *ac)
@@ -1831,14 +1848,28 @@ static void cache_flusharray (kmem_cache
 	BUG_ON(!batchcount || batchcount > ac->avail);
 #endif
 	check_irq_off();
-	__free_block(cachep, &ac_entry(ac)[0], batchcount);
+	spin_lock(&cachep->spinlock);
+	if (cachep->lists.shared) {
+		struct array_cache *shared_array = cachep->lists.shared;
+		int max = shared_array->limit-shared_array->avail;
+		if (max) {
+			if (batchcount > max)
+				batchcount = max;
+			memcpy(&ac_entry(shared_array)[shared_array->avail],
+					&ac_entry(ac)[0],
+					sizeof(void*)*batchcount);
+			shared_array->avail += batchcount;
+			goto free_done;
+		}
+	}
 
+	free_block(cachep, &ac_entry(ac)[0], batchcount);
+free_done:
 #if STATS
 	{
 		int i = 0;
 		struct list_head *p;
 
-		spin_lock(&cachep->spinlock);
 		p = list3_data(cachep)->slabs_free.next;
 		while (p != &(list3_data(cachep)->slabs_free)) {
 			struct slab *slabp;
@@ -1850,9 +1881,9 @@ static void cache_flusharray (kmem_cache
 			p = p->next;
 		}
 		STATS_SET_FREEABLE(cachep, i);
-		spin_unlock(&cachep->spinlock);
 	}
 #endif
+	spin_unlock(&cachep->spinlock);
 	ac->avail -= batchcount;
 	memmove(&ac_entry(ac)[0], &ac_entry(ac)[batchcount],
 			sizeof(void*)*ac->avail);
@@ -2086,9 +2117,10 @@ static void do_ccupdate_local(void *info
 }
 
 
-static int do_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount)
+static int do_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount, int shared)
 {
 	struct ccupdate_struct new;
+	struct array_cache *new_shared;
 	int i;
 
 	memset(&new.new,0,sizeof(new.new));
@@ -2122,11 +2154,29 @@ static int do_tune_cpucache (kmem_cache_
 		struct array_cache *ccold = new.new[i];
 		if (!ccold)
 			continue;
-		local_irq_disable();
+		spin_lock_irq(&cachep->spinlock);
 		free_block(cachep, ac_entry(ccold), ccold->avail);
-		local_irq_enable();
+		spin_unlock_irq(&cachep->spinlock);
 		kfree(ccold);
 	}
+	new_shared = kmalloc(sizeof(void*)*batchcount*shared+
+				sizeof(struct array_cache), GFP_KERNEL);
+	if (new_shared) {
+		struct array_cache *old;
+		new_shared->avail = 0;
+		new_shared->limit = batchcount*shared;
+		new_shared->batchcount = 0xbaadf00d;
+		new_shared->touched = 0;
+
+		spin_lock_irq(&cachep->spinlock);
+		old = cachep->lists.shared;
+		cachep->lists.shared = new_shared;
+		if (old)
+			free_block(cachep, ac_entry(old), old->avail);
+		spin_unlock_irq(&cachep->spinlock);
+		kfree(old);
+	}
+
 	return 0;
 }
 
@@ -2134,7 +2184,7 @@ static int do_tune_cpucache (kmem_cache_
 static void enable_cpucache (kmem_cache_t *cachep)
 {
 	int err;
-	int limit;
+	int limit, shared;
 
 	/* The head array serves three purposes:
 	 * - create a LIFO ordering, i.e. return objects that are cache-warm
@@ -2149,11 +2199,25 @@ static void enable_cpucache (kmem_cache_
 	else if (cachep->objsize > PAGE_SIZE)
 		limit = 8;
 	else if (cachep->objsize > 1024)
-		limit = 54;
+		limit = 24;
 	else if (cachep->objsize > 256)
-		limit = 120;
+		limit = 54;
 	else
-		limit = 248;
+		limit = 120;
+
+	/* Cpu bound tasks (e.g. network routing) can exhibit cpu bound
+	 * allocation behaviour: Most allocs on one cpu, most free operations
+	 * on another cpu. For these cases, an efficient object passing between
+	 * cpus is necessary. This is provided by a shared array. The array
+	 * replaces Bonwick's magazine layer.
+	 * On uniprocessor, it's functionally equivalent (but less efficient)
+	 * to a larger limit. Thus disabled by default.
+	 */
+	shared = 0;
+#ifdef CONFIG_SMP
+	if (cachep->objsize <= PAGE_SIZE)
+		shared = 8;
+#endif
 
 #if DEBUG
 	/* With debugging enabled, large batchcount lead to excessively
@@ -2163,12 +2227,53 @@ static void enable_cpucache (kmem_cache_
 	if (limit > 32)
 		limit = 32;
 #endif
-	err = do_tune_cpucache(cachep, limit, (limit+1)/2);
+	err = do_tune_cpucache(cachep, limit, (limit+1)/2, shared);
 	if (err)
 		printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
 					cachep->name, -err);
 }
 
+static void drain_array(kmem_cache_t *cachep, struct array_cache *ac)
+{
+	int tofree;
+
+	check_irq_off();
+	if (ac->touched) {
+		ac->touched = 0;
+	} else if (ac->avail) {
+		tofree = (ac->limit+4)/5;
+		if (tofree > ac->avail) {
+			tofree = (ac->avail+1)/2;
+		}
+		spin_lock(&cachep->spinlock);
+		free_block(cachep, ac_entry(ac), tofree);
+		spin_unlock(&cachep->spinlock);
+		ac->avail -= tofree;
+		memmove(&ac_entry(ac)[0], &ac_entry(ac)[tofree],
+					sizeof(void*)*ac->avail);
+	}
+}
+
+static void drain_array_locked(kmem_cache_t *cachep, struct array_cache *ac)
+{
+	int tofree;
+
+	check_spinlock_acquired(cachep);
+	if (ac->touched) {
+		ac->touched = 0;
+	} else if (ac->avail) {
+		tofree = (ac->limit+4)/5;
+		if (tofree > ac->avail) {
+			tofree = (ac->avail+1)/2;
+		}
+		free_block(cachep, ac_entry(ac), tofree);
+		ac->avail -= tofree;
+		memmove(&ac_entry(ac)[0], &ac_entry(ac)[tofree],
+					sizeof(void*)*ac->avail);
+	}
+}
+
+
 /**
  * cache_reap - Reclaim memory from caches.
  *
@@ -2195,7 +2300,6 @@ static inline void cache_reap (void)
 		kmem_cache_t *searchp;
 		struct list_head* p;
 		int tofree;
-		struct array_cache *ac;
 		struct slab *slabp;
 
 		searchp = list_entry(walk, kmem_cache_t, next);
@@ -2205,19 +2309,8 @@ static inline void cache_reap (void)
 
 		check_irq_on();
 		local_irq_disable();
-		ac = ac_data(searchp);
-		if (ac->touched) {
-			ac->touched = 0;
-		} else if (ac->avail) {
-			tofree = (ac->limit+4)/5;
-			if (tofree > ac->avail) {
-				tofree = (ac->avail+1)/2;
-			}
-			free_block(searchp, ac_entry(ac), tofree);
-			ac->avail -= tofree;
-			memmove(&ac_entry(ac)[0], &ac_entry(ac)[tofree],
-					sizeof(void*)*ac->avail);
-		}
+		drain_array(searchp, ac_data(searchp));
+
 		if(time_after(searchp->lists.next_reap, jiffies))
 			goto next_irqon;
 
@@ -2226,6 +2319,10 @@ static inline void cache_reap (void)
 			goto next_unlock;
 		}
 		searchp->lists.next_reap = jiffies + REAPTIMEOUT_LIST3;
+
+		if (searchp->lists.shared)
+			drain_array_locked(searchp, searchp->lists.shared);
+
 		if (searchp->lists.free_touched) {
 			searchp->lists.free_touched = 0;
 			goto next_unlock;
@@ -2435,7 +2532,7 @@ struct seq_operations slabinfo_op = {
 
 #define MAX_SLABINFO_WRITE 128
 /**
- * slabinfo_write - SMP tuning for the slab allocator
+ * slabinfo_write - Tuning for the slab allocator
  * @file: unused
  * @buffer: user buffer
  * @count: data len
@@ -2445,7 +2542,7 @@ ssize_t slabinfo_write(struct file *file
 				size_t count, loff_t *ppos)
 {
 	char kbuf[MAX_SLABINFO_WRITE+1], *tmp;
-	int limit, batchcount, res;
+	int limit, batchcount, shared, res;
 	struct list_head *p;
 	
 	if (count > MAX_SLABINFO_WRITE)
@@ -2459,10 +2556,8 @@ ssize_t slabinfo_write(struct file *file
 		return -EINVAL;
 	*tmp = '\0';
 	tmp++;
-	limit = simple_strtol(tmp, &tmp, 10);
-	while (*tmp == ' ')
-		tmp++;
-	batchcount = simple_strtol(tmp, &tmp, 10);
+	if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3)
+		return -EINVAL;
 
 	/* Find the cache in the chain of caches. */
 	down(&cache_chain_sem);
@@ -2473,10 +2568,11 @@ ssize_t slabinfo_write(struct file *file
 		if (!strcmp(cachep->name, kbuf)) {
 			if (limit < 1 ||
 			    batchcount < 1 ||
-			    batchcount > limit) {
+			    batchcount > limit ||
+			    shared < 0) {
 				res = -EINVAL;
 			} else {
-				res = do_tune_cpucache(cachep, limit, batchcount);
+				res = do_tune_cpucache(cachep, limit, batchcount, shared);
 			}
 			break;
 		}

_