Index: oldkernel/linux/drivers/block/xor.c
diff -u linux/drivers/block/xor.c:1.1 linux/drivers/block/xor.c:1.2
--- linux/drivers/block/xor.c:1.1	Thu Jun  1 14:53:31 2000
+++ linux/drivers/block/xor.c	Thu Jun  1 15:06:21 2000
@@ -22,6 +22,10 @@
 #include <asm/asi.h>
 #include <asm/visasm.h>
 #endif
+#ifdef __i386__
+#include <asm/processor.h>
+#include <asm/i387.h>
+#endif
 
 /*
  * we use the 'XOR function template' to register multiple xor
@@ -66,7 +70,7 @@
 
 #ifdef __i386__
 
-#ifdef CONFIG_X86_XMM
+#ifdef CONFIG_X86_CPU_OPTIMIZATIONS
 /*
  * Cache avoiding checksumming functions utilizing KNI instructions
  * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
@@ -74,21 +78,13 @@
 
 XORBLOCK_TEMPLATE(pIII_kni)
 {
-	char xmm_save[16*4];
-	int cr0;
-        int lines = (bh_ptr[0]->b_size>>8);
-
-	__asm__ __volatile__ ( 
-		"movl %%cr0,%0		;\n\t"
-		"clts			;\n\t"
-		"movups %%xmm0,(%1)	;\n\t"
-		"movups %%xmm1,0x10(%1)	;\n\t"
-		"movups %%xmm2,0x20(%1)	;\n\t"
-		"movups %%xmm3,0x30(%1)	;\n\t"
-		: "=r" (cr0)
-		: "r" (xmm_save) 
-		: "memory" );
+	char xmm_space[64];
+	int lines = (bh_ptr[0]->b_size>>8);
+	int recursive = 0;
+	unsigned long flags;
 
+	kernel_take_fpu_kni(recursive,&xmm_space[0],NULL,flags);
+
 #define OFFS(x) "8*("#x"*2)"
 #define	PF0(x) \
 	"	prefetcht0  "OFFS(x)"(%1)   ;\n"
@@ -157,7 +153,7 @@
         "       jnz 1b                  ;\n"
 
         		:
-			: "r" (lines),
+			: "m" (lines),
 			  "r" (bh_ptr[0]->b_data),
         		  "r" (bh_ptr[1]->b_data)
 		        : "memory" );
@@ -207,7 +203,7 @@
         "       decl %0                 ;\n"
         "       jnz 1b                  ;\n"
         		:
-			: "r" (lines),
+			: "m" (lines),
 			  "r" (bh_ptr[0]->b_data),
         		  "r" (bh_ptr[1]->b_data),
 			  "r" (bh_ptr[2]->b_data)
@@ -266,7 +262,7 @@
         "       jnz 1b                  ;\n"
 
         		:
-			: "r" (lines),
+			: "m" (lines),
 			  "r" (bh_ptr[0]->b_data),
         		  "r" (bh_ptr[1]->b_data),
 			  "r" (bh_ptr[2]->b_data),
@@ -333,7 +329,7 @@
         "       jnz 1b                  ;\n"
 
         		:
-			: "r" (lines),
+			: "m" (lines),
 			  "r" (bh_ptr[0]->b_data),
         		  "r" (bh_ptr[1]->b_data),
 			  "r" (bh_ptr[2]->b_data),
@@ -343,16 +339,7 @@
 			break;
 	}
 
-	__asm__ __volatile__ ( 
-		"sfence			;\n\t"
-		"movups (%1),%%xmm0	;\n\t"
-		"movups 0x10(%1),%%xmm1	;\n\t"
-		"movups 0x20(%1),%%xmm2	;\n\t"
-		"movups 0x30(%1),%%xmm3	;\n\t"
-		"movl 	%0,%%cr0	;\n\t"
-		:
-		: "r" (cr0), "r" (xmm_save)
-		: "memory" );
+	kernel_release_fpu_kni(recursive,&xmm_space[0],flags);
 }
 
 #undef OFFS
@@ -371,7 +358,7 @@
 #undef XO5
 #undef BLOCK
 
-#endif /* CONFIG_X86_XMM */
+#endif /* CONFIG_X86_CPU_OPTIMIZATIONS */
 
 /*
  * high-speed RAID5 checksumming functions utilizing MMX instructions
@@ -379,13 +366,12 @@
  */
 XORBLOCK_TEMPLATE(pII_mmx)
 {
-	char fpu_save[108];
         int lines = (bh_ptr[0]->b_size>>7);
-
-	if (!(current->flags & PF_USEDFPU))
-		__asm__ __volatile__ ( " clts;\n");
+	char mmx_space[32];
+	int recursive = 0;
+	unsigned long flags;
 
-	__asm__ __volatile__ ( " fsave %0; fwait\n"::"m"(fpu_save[0]) );
+	kernel_take_fpu_mmx(recursive,&mmx_space[0],NULL,flags);
 
 #define LD(x,y) \
         "       movq   8*("#x")(%1), %%mm"#y"   ;\n"
@@ -431,7 +417,7 @@
 		        "       decl %0               ;\n"
 		        "       jnz 1b                ;\n"
 	        	:
-			: "r" (lines),
+			: "m" (lines),
 			  "r" (bh_ptr[0]->b_data),
 			  "r" (bh_ptr[1]->b_data)
 			: "memory");
@@ -471,7 +457,7 @@
 		        "       decl %0               ;\n"
 		        "       jnz 1b                ;\n"
 	        	:
-			: "r" (lines),
+			: "m" (lines),
 			  "r" (bh_ptr[0]->b_data),
 			  "r" (bh_ptr[1]->b_data),
 			  "r" (bh_ptr[2]->b_data)
@@ -517,7 +503,7 @@
 		        "       decl %0               ;\n"
 		        "       jnz 1b                ;\n"
 	        	:
-			: "r" (lines),
+			: "m" (lines),
 			  "r" (bh_ptr[0]->b_data),
 			  "r" (bh_ptr[1]->b_data),
 			  "r" (bh_ptr[2]->b_data),
@@ -569,7 +555,7 @@
 		        "       decl %0               ;\n"
 		        "       jnz 1b                ;\n"
 	        	:
-			: "r" (lines),
+			: "m" (lines),
 			  "r" (bh_ptr[0]->b_data),
 			  "r" (bh_ptr[1]->b_data),
 			  "r" (bh_ptr[2]->b_data),
@@ -579,10 +565,7 @@
 			break;
 	}
 
-	__asm__ __volatile__ ( " frstor %0;\n"::"m"(fpu_save[0]) );
-
-	if (!(current->flags & PF_USEDFPU))
-		stts();
+	kernel_release_fpu_mmx(recursive,&mmx_space[0],flags);
 }
 
 #undef LD
@@ -595,13 +578,12 @@
 
 XORBLOCK_TEMPLATE(p5_mmx)
 {
-	char fpu_save[108];
         int lines = (bh_ptr[0]->b_size>>6);
-
-	if (!(current->flags & PF_USEDFPU))
-		__asm__ __volatile__ ( " clts;\n");
+	char mmx_space[32];
+	int recursive = 0;
+	unsigned long flags;
 
-	__asm__ __volatile__ ( " fsave %0; fwait\n"::"m"(fpu_save[0]) );
+	kernel_take_fpu_mmx(recursive,&mmx_space[0],NULL,flags);
 
 	switch(count) {
 		case 2:
@@ -618,21 +600,21 @@
 			        "       movq 24(%1), %%mm3   ;\n"
 			        "       movq %%mm1,  8(%1)   ;\n"
 			        "       pxor 16(%2), %%mm2   ;\n"
-			        "       movq 32(%1), %%mm4   ;\n"
+			        "       movq 32(%1), %%mm0   ;\n"
 			        "       movq %%mm2, 16(%1)   ;\n"
 			        "       pxor 24(%2), %%mm3   ;\n"
-			        "       movq 40(%1), %%mm5   ;\n"
+			        "       movq 40(%1), %%mm1   ;\n"
 			        "       movq %%mm3, 24(%1)   ;\n"
-			        "       pxor 32(%2), %%mm4   ;\n"
-			        "       movq 48(%1), %%mm6   ;\n"
-			        "       movq %%mm4, 32(%1)   ;\n"
-			        "       pxor 40(%2), %%mm5   ;\n"
-			        "       movq 56(%1), %%mm7   ;\n"
-			        "       movq %%mm5, 40(%1)   ;\n"
-			        "       pxor 48(%2), %%mm6   ;\n"
-			        "       pxor 56(%2), %%mm7   ;\n"
-			        "       movq %%mm6, 48(%1)   ;\n"
-			        "       movq %%mm7, 56(%1)   ;\n"
+			        "       pxor 32(%2), %%mm0   ;\n"
+			        "       movq 48(%1), %%mm2   ;\n"
+			        "       movq %%mm0, 32(%1)   ;\n"
+			        "       pxor 40(%2), %%mm1   ;\n"
+			        "       movq 56(%1), %%mm3   ;\n"
+			        "       movq %%mm1, 40(%1)   ;\n"
+			        "       pxor 48(%2), %%mm2   ;\n"
+			        "       pxor 56(%2), %%mm3   ;\n"
+			        "       movq %%mm2, 48(%1)   ;\n"
+			        "       movq %%mm3, 56(%1)   ;\n"
         
 			        "       addl $64, %1         ;\n"
 			        "       addl $64, %2         ;\n"
@@ -640,7 +622,7 @@
 			        "       jnz 1b               ;\n"
 
 			        : 
-			        : "r" (lines),
+			        : "m" (lines),
 				  "r" (bh_ptr[0]->b_data),
 				  "r" (bh_ptr[1]->b_data)
 			        : "memory" );
@@ -662,26 +644,26 @@
 			        "       pxor 16(%3), %%mm2   ;\n"
 			        "       movq 24(%1), %%mm3   ;\n"
 			        "       movq %%mm1,  8(%1)   ;\n"
-			        "       movq 32(%1), %%mm4   ;\n"
-			        "       movq 40(%1), %%mm5   ;\n"
+			        "       movq 32(%1), %%mm0   ;\n"
+			        "       movq 40(%1), %%mm1   ;\n"
 			        "       pxor 24(%2), %%mm3   ;\n"
 			        "       movq %%mm2, 16(%1)   ;\n"
-			        "       pxor 32(%2), %%mm4   ;\n"
+			        "       pxor 32(%2), %%mm0   ;\n"
 			        "       pxor 24(%3), %%mm3   ;\n"
-			        "       pxor 40(%2), %%mm5   ;\n"
+			        "       pxor 40(%2), %%mm1   ;\n"
 			        "       movq %%mm3, 24(%1)   ;\n"
-			        "       pxor 32(%3), %%mm4   ;\n"
-			        "       pxor 40(%3), %%mm5   ;\n"
-			        "       movq 48(%1), %%mm6   ;\n"
-			        "       movq %%mm4, 32(%1)   ;\n"
-			        "       movq 56(%1), %%mm7   ;\n"
-			        "       pxor 48(%2), %%mm6   ;\n"
-			        "       movq %%mm5, 40(%1)   ;\n"
-			        "       pxor 56(%2), %%mm7   ;\n"
-			        "       pxor 48(%3), %%mm6   ;\n"
-			        "       pxor 56(%3), %%mm7   ;\n"
-			        "       movq %%mm6, 48(%1)   ;\n"
-			        "       movq %%mm7, 56(%1)   ;\n"
+			        "       pxor 32(%3), %%mm0   ;\n"
+			        "       pxor 40(%3), %%mm1   ;\n"
+			        "       movq 48(%1), %%mm2   ;\n"
+			        "       movq %%mm0, 32(%1)   ;\n"
+			        "       movq 56(%1), %%mm3   ;\n"
+			        "       pxor 48(%2), %%mm2   ;\n"
+			        "       movq %%mm1, 40(%1)   ;\n"
+			        "       pxor 56(%2), %%mm3   ;\n"
+			        "       pxor 48(%3), %%mm2   ;\n"
+			        "       pxor 56(%3), %%mm3   ;\n"
+			        "       movq %%mm2, 48(%1)   ;\n"
+			        "       movq %%mm3, 56(%1)   ;\n"
         
 			        "       addl $64, %1         ;\n"
 			        "       addl $64, %2         ;\n"
@@ -690,7 +672,7 @@
 			        "       jnz 1b               ;\n"
 
 			        : 
-			        : "r" (lines),
+			        : "m" (lines),
 				  "r" (bh_ptr[0]->b_data),
 				  "r" (bh_ptr[1]->b_data),
 				  "r" (bh_ptr[2]->b_data)
@@ -714,33 +696,33 @@
 			        "       pxor 16(%3), %%mm2   ;\n"
 			        "       pxor  8(%4), %%mm1   ;\n"
 			        "       movq %%mm0,   (%1)   ;\n"
-			        "       movq 32(%1), %%mm4   ;\n"
+			        "       movq 32(%1), %%mm0   ;\n"
 			        "       pxor 24(%2), %%mm3   ;\n"
 			        "       pxor 16(%4), %%mm2   ;\n"
 			        "       movq %%mm1,  8(%1)   ;\n"
-			        "       movq 40(%1), %%mm5   ;\n"
-			        "       pxor 32(%2), %%mm4   ;\n"
+			        "       movq 40(%1), %%mm1   ;\n"
+			        "       pxor 32(%2), %%mm0   ;\n"
 			        "       pxor 24(%3), %%mm3   ;\n"
 			        "       movq %%mm2, 16(%1)   ;\n"
-			        "       pxor 40(%2), %%mm5   ;\n"
-			        "       pxor 32(%3), %%mm4   ;\n"
+			        "       pxor 40(%2), %%mm1   ;\n"
+			        "       pxor 32(%3), %%mm0   ;\n"
 			        "       pxor 24(%4), %%mm3   ;\n"
 			        "       movq %%mm3, 24(%1)   ;\n"
-			        "       movq 56(%1), %%mm7   ;\n"
-			        "       movq 48(%1), %%mm6   ;\n"
-			        "       pxor 40(%3), %%mm5   ;\n"
-			        "       pxor 32(%4), %%mm4   ;\n"
-			        "       pxor 48(%2), %%mm6   ;\n"
-			        "       movq %%mm4, 32(%1)   ;\n"
-			        "       pxor 56(%2), %%mm7   ;\n"
-			        "       pxor 40(%4), %%mm5   ;\n"
-			        "       pxor 48(%3), %%mm6   ;\n"
-			        "       pxor 56(%3), %%mm7   ;\n"
-			        "       movq %%mm5, 40(%1)   ;\n"
-			        "       pxor 48(%4), %%mm6   ;\n"
-			        "       pxor 56(%4), %%mm7   ;\n"
-			        "       movq %%mm6, 48(%1)   ;\n"
-			        "       movq %%mm7, 56(%1)   ;\n"
+			        "       movq 56(%1), %%mm3   ;\n"
+			        "       movq 48(%1), %%mm2   ;\n"
+			        "       pxor 40(%3), %%mm1   ;\n"
+			        "       pxor 32(%4), %%mm0   ;\n"
+			        "       pxor 48(%2), %%mm2   ;\n"
+			        "       movq %%mm0, 32(%1)   ;\n"
+			        "       pxor 56(%2), %%mm3   ;\n"
+			        "       pxor 40(%4), %%mm1   ;\n"
+			        "       pxor 48(%3), %%mm2   ;\n"
+			        "       pxor 56(%3), %%mm3   ;\n"
+			        "       movq %%mm1, 40(%1)   ;\n"
+			        "       pxor 48(%4), %%mm2   ;\n"
+			        "       pxor 56(%4), %%mm3   ;\n"
+			        "       movq %%mm2, 48(%1)   ;\n"
+			        "       movq %%mm3, 56(%1)   ;\n"
         
 			        "       addl $64, %1         ;\n"
 			        "       addl $64, %2         ;\n"
@@ -750,7 +732,7 @@
 			        "       jnz 1b               ;\n"
 
 			        : 
-			        : "r" (lines),
+			        : "m" (lines),
 				  "r" (bh_ptr[0]->b_data),
 				  "r" (bh_ptr[1]->b_data),
 				  "r" (bh_ptr[2]->b_data),
@@ -782,34 +764,34 @@
 			        "       movq %%mm1,  8(%1)   ;\n"
 			        "       pxor 16(%5), %%mm2   ;\n"
 			        "       pxor 24(%3), %%mm3   ;\n"
-			        "       movq 32(%1), %%mm4   ;\n"
+			        "       movq 32(%1), %%mm0   ;\n"
 			        "       movq %%mm2, 16(%1)   ;\n"
 			        "       pxor 24(%4), %%mm3   ;\n"
-			        "       pxor 32(%2), %%mm4   ;\n"
-			        "       movq 40(%1), %%mm5   ;\n"
+			        "       pxor 32(%2), %%mm0   ;\n"
+			        "       movq 40(%1), %%mm1   ;\n"
 			        "       pxor 24(%5), %%mm3   ;\n"
-			        "       pxor 32(%3), %%mm4   ;\n"
-			        "       pxor 40(%2), %%mm5   ;\n"
+			        "       pxor 32(%3), %%mm0   ;\n"
+			        "       pxor 40(%2), %%mm1   ;\n"
 			        "       movq %%mm3, 24(%1)   ;\n"
-			        "       pxor 32(%4), %%mm4   ;\n"
-			        "       pxor 40(%3), %%mm5   ;\n"
-			        "       movq 48(%1), %%mm6   ;\n"
-			        "       movq 56(%1), %%mm7   ;\n"
-			        "       pxor 32(%5), %%mm4   ;\n"
-			        "       pxor 40(%4), %%mm5   ;\n"
-			        "       pxor 48(%2), %%mm6   ;\n"
-			        "       pxor 56(%2), %%mm7   ;\n"
-			        "       movq %%mm4, 32(%1)   ;\n"
-			        "       pxor 48(%3), %%mm6   ;\n"
-			        "       pxor 56(%3), %%mm7   ;\n"
-			        "       pxor 40(%5), %%mm5   ;\n"
-			        "       pxor 48(%4), %%mm6   ;\n"
-			        "       pxor 56(%4), %%mm7   ;\n"
-			        "       movq %%mm5, 40(%1)   ;\n"
-			        "       pxor 48(%5), %%mm6   ;\n"
-			        "       pxor 56(%5), %%mm7   ;\n"
-			        "       movq %%mm6, 48(%1)   ;\n"
-			        "       movq %%mm7, 56(%1)   ;\n"
+			        "       pxor 32(%4), %%mm0   ;\n"
+			        "       pxor 40(%3), %%mm1   ;\n"
+			        "       movq 48(%1), %%mm2   ;\n"
+			        "       movq 56(%1), %%mm3   ;\n"
+			        "       pxor 32(%5), %%mm0   ;\n"
+			        "       pxor 40(%4), %%mm1   ;\n"
+			        "       pxor 48(%2), %%mm2   ;\n"
+			        "       pxor 56(%2), %%mm3   ;\n"
+			        "       movq %%mm0, 32(%1)   ;\n"
+			        "       pxor 48(%3), %%mm2   ;\n"
+			        "       pxor 56(%3), %%mm3   ;\n"
+			        "       pxor 40(%5), %%mm1   ;\n"
+			        "       pxor 48(%4), %%mm2   ;\n"
+			        "       pxor 56(%4), %%mm3   ;\n"
+			        "       movq %%mm1, 40(%1)   ;\n"
+			        "       pxor 48(%5), %%mm2   ;\n"
+			        "       pxor 56(%5), %%mm3   ;\n"
+			        "       movq %%mm2, 48(%1)   ;\n"
+			        "       movq %%mm3, 56(%1)   ;\n"
         
 			        "       addl $64, %1         ;\n"
 			        "       addl $64, %2         ;\n"
@@ -820,7 +802,7 @@
 			        "       jnz 1b               ;\n"
 
 			        : 
-			        : "r" (lines),
+			        : "m" (lines),
 				  "r" (bh_ptr[0]->b_data),
 				  "r" (bh_ptr[1]->b_data),
 				  "r" (bh_ptr[2]->b_data),
@@ -830,10 +812,7 @@
 			break;
 	}
 
-	__asm__ __volatile__ ( " frstor %0;\n"::"m"(fpu_save[0]) );
-
-	if (!(current->flags & PF_USEDFPU))
-		stts();
+	kernel_release_fpu_mmx(recursive,&mmx_space[0],flags);
 }
 #endif /* __i386__ */
 #endif /* !__sparc_v9__ */
@@ -1811,11 +1790,12 @@
 		if (f->speed > fastest->speed)
 			fastest = f;
 	}
-#ifdef CONFIG_X86_XMM 
-	if (boot_cpu_data.mmu_cr4_features & X86_CR4_OSXMMEXCPT) {
+#ifdef CONFIG_X86_CPU_OPTIMIZATIONS
+	if ( (boot_cpu_data.mmu_cr4_features & X86_CR4_OSFXSR) &&
+	     (boot_cpu_data.x86_capability & X86_FEATURE_XMM) ) {
 		fastest = &t_xor_block_pIII_kni;
 	}
-#endif
+#endif /* CONFIG_X86_CPU_OPTIMIZATIONS */
 	xor_block = fastest->xor_block;
 	printk( "using fastest function: %s (%d.%03d MB/sec)\n", fastest->name,
 		fastest->speed / 1000, fastest->speed % 1000);
@@ -1847,8 +1827,9 @@
 	xor_speed(&t_xor_block_SPARC,&b1,&b2);
 #endif
 
-#ifdef CONFIG_X86_XMM 
-	if (boot_cpu_data.mmu_cr4_features & X86_CR4_OSXMMEXCPT) {
+#ifdef CONFIG_X86_CPU_OPTIMIZATIONS
+	if ( (boot_cpu_data.mmu_cr4_features & X86_CR4_OSFXSR) &&
+	     (boot_cpu_data.x86_capability & X86_FEATURE_XMM) ) {
 		printk(KERN_INFO
 			"raid5: KNI detected, trying cache-avoiding KNI checksum routine\n");
 		/* we force the use of the KNI xor block because it
@@ -1859,7 +1840,7 @@
 		*/
 		xor_speed(&t_xor_block_pIII_kni,&b1,&b2);
 	}
-#endif /* CONFIG_X86_XMM */
+#endif /* CONFIG_X86_CPU_OPTIMIZATIONS */
 
 #ifdef __i386__
 
