[PATCH] x86_64: Undo the earlier changes to remove unrolled copy/memset functions
They cause quite bad performance regressions on Netburst
This is temporary until we can get new optimized functions
for these CPUs.
This undoes changes that were done in 2.6.15 and in 2.6.16-rc1,
essentially bringing the code back to 2.6.14 level. Only change
is I renamed the X86_FEATURE_K8_C flag to X86_FEATURE_REP_GOOD
and fixed the check for the flag and also fixed some comments.
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/arch/x86_64/lib/clear_page.S b/arch/x86_64/lib/clear_page.S
index 43d9fa1..1f81b79 100644
--- a/arch/x86_64/lib/clear_page.S
+++ b/arch/x86_64/lib/clear_page.S
@@ -5,8 +5,46 @@
.globl clear_page
.p2align 4
clear_page:
+ xorl %eax,%eax
+ movl $4096/64,%ecx
+ .p2align 4
+.Lloop:
+ decl %ecx
+#define PUT(x) movq %rax,x*8(%rdi)
+ movq %rax,(%rdi)
+ PUT(1)
+ PUT(2)
+ PUT(3)
+ PUT(4)
+ PUT(5)
+ PUT(6)
+ PUT(7)
+ leaq 64(%rdi),%rdi
+ jnz .Lloop
+ nop
+ ret
+clear_page_end:
+
+ /* Some CPUs run faster using the string instructions.
+ It is also a lot simpler. Use this when possible */
+
+#include <asm/cpufeature.h>
+
+ .section .altinstructions,"a"
+ .align 8
+ .quad clear_page
+ .quad clear_page_c
+ .byte X86_FEATURE_REP_GOOD
+ .byte clear_page_end-clear_page
+ .byte clear_page_c_end-clear_page_c
+ .previous
+
+ .section .altinstr_replacement,"ax"
+clear_page_c:
movl $4096/8,%ecx
xorl %eax,%eax
rep
stosq
ret
+clear_page_c_end:
+ .previous