diff -urN linux-2.4.18/arch/i386/config.in linux/arch/i386/config.in
--- linux-2.4.18/arch/i386/config.in	Tue Aug 20 17:20:33 2002
+++ linux/arch/i386/config.in	Sat Aug 24 23:17:30 2002
@@ -105,6 +105,7 @@
    define_bool CONFIG_X86_GOOD_APIC y
    define_bool CONFIG_X86_PGE y
    define_bool CONFIG_X86_USE_PPRO_CHECKSUM y
+   define_bool CONFIG_X86_USE_SMP_BALANCE y
 fi
 if [ "$CONFIG_MPENTIUM4" = "y" ]; then
    define_int  CONFIG_X86_L1_CACHE_SHIFT 7
diff -urN linux-2.4.18/include/asm-i386/processor.h linux/include/asm-i386/processor.h
--- linux-2.4.18/include/asm-i386/processor.h	Tue Aug 20 17:21:14 2002
+++ linux/include/asm-i386/processor.h	Sat Aug 24 23:17:30 2002
@@ -506,4 +506,6 @@
 
 #endif
 
+#define ARCH_HAS_SMP_BALANCE
+
 #endif /* __ASM_I386_PROCESSOR_H */
diff -urN linux-2.4.18/include/asm-i386/smp_balance.h linux/include/asm-i386/smp_balance.h
--- linux-2.4.18/include/asm-i386/smp_balance.h	Wed Dec 31 19:00:00 1969
+++ linux/include/asm-i386/smp_balance.h	Sat Aug 24 23:20:22 2002
@@ -0,0 +1,56 @@
+#ifndef _ASM_SMP_BALANCE_H
+#define _ASM_SMP_BALANCE_H
+
+/*
+ * We have an architecture-specific SMP load balancer to improve
+ * scheduling behavior on hyperthreaded CPUs.  Since only P4s have
+ * HT, we use the code only if CONFIG_MPENTIUM4 is set.
+ *
+ * Distributions may want to make this unconditional to support all
+ * x86 machines on one kernel.  The overhead in the non-P4 case is
+ * minimal while the benefit to SMP P4s is probably decent.
+ */
+#if defined(CONFIG_X86_USE_SMP_BALANCE)
+
+/*
+ * Find any idle processor package (i.e. both virtual processors are idle)
+ */
+static inline int find_idle_package(int this_cpu)
+{
+	int i;
+
+	this_cpu = cpu_number_map(this_cpu);
+
+	for (i = (this_cpu + 1) % smp_num_cpus;
+	     i != this_cpu;
+	     i = (i + 1) % smp_num_cpus) {
+		int physical = cpu_logical_map(i);
+		int sibling = cpu_sibling_map[physical];
+
+		if (idle_cpu(physical) && idle_cpu(sibling))
+			return physical;
+	}
+	return -1;	/* not found */
+}
+
+static inline int arch_load_balance(int this_cpu, int idle)
+{
+	/* Special hack for hyperthreading */
+       if (unlikely(smp_num_siblings > 1 && idle && !idle_cpu(cpu_sibling_map[this_cpu]))) {
+               int found;
+               struct runqueue *rq_target;
+
+               if ((found = find_idle_package(this_cpu)) >= 0 ) {
+                       rq_target = cpu_rq(found);
+                       resched_task(rq_target->idle);
+                       return 1;
+               }
+       }
+       return 0;
+}
+
+#else
+#define arch_load_balance(x, y)		(0)
+#endif
+
+#endif /* _ASM_SMP_BALANCE_H */
diff -urN linux-2.4.18/include/linux/smp_balance.h linux/include/linux/smp_balance.h
--- linux-2.4.18/include/linux/smp_balance.h	Wed Dec 31 19:00:00 1969
+++ linux/include/linux/smp_balance.h	Sat Aug 24 23:14:25 2002
@@ -0,0 +1,14 @@
+#ifndef _LINUX_SMP_BALANCE_H
+#define _LINUX_SMP_BALANCE_H
+
+/*
+ * per-architecture load balancing logic, e.g. for hyperthreading
+ */
+
+#ifdef ARCH_HAS_SMP_BALANCE
+#include <asm/smp_balance.h>
+#else
+#define arch_load_balance(x, y)		(0)
+#endif
+
+#endif /* _LINUX_SMP_BALANCE_H */
diff -urN linux-2.4.18/kernel/sched.c linux/kernel/sched.c
--- linux-2.4.18/kernel/sched.c	Tue Aug 20 17:21:19 2002
+++ linux/kernel/sched.c	Sat Aug 24 23:17:16 2002
@@ -487,6 +487,12 @@
 	list_t *head, *curr;
 
 	/*
+	 * Handle architecture-specific balancing, such as hyperthreading.
+	 */
+	if (arch_load_balance(this_cpu, idle))
+		return;
+
+	/*
 	 * We search all runqueues to find the most busy one.
 	 * We do this lockless to reduce cache-bouncing overhead,
 	 * we re-check the 'best' source CPU later on again, with
@@ -621,6 +627,8 @@
 	spin_unlock(&busiest->lock);
 }
 
+#include <linux/smp_balance.h>
+
 /*
  * One of the idle_cpu_tick() or the busy_cpu_tick() function will
  * gets called every timer tick, on every CPU. Our balancing action
