diff --git a/cpu/arm_cortexa8/cpu.c b/cpu/arm_cortexa8/cpu.c
index 5a5981e4054fada5a3aff183e18efed8844f7537..a01e0d605ff8fc6397687cc6d7c8de9508143b21 100644
--- a/cpu/arm_cortexa8/cpu.c
+++ b/cpu/arm_cortexa8/cpu.c
@@ -64,7 +64,7 @@ int cleanup_before_linux(void)
 	/* turn off L2 cache */
 	l2_cache_disable();
 	/* invalidate L2 cache also */
-	v7_flush_dcache_all(get_device_type());
+	invalidate_dcache(get_device_type());
 #endif
 	i = 0;
 	/* mem barrier to sync up things */
diff --git a/cpu/arm_cortexa8/omap3/Makefile b/cpu/arm_cortexa8/omap3/Makefile
index eef165c3341f3ee6ff7d3a60195ad3d07cd0c18b..136b163ad7549609907621d1bc21f53498f778e4 100644
--- a/cpu/arm_cortexa8/omap3/Makefile
+++ b/cpu/arm_cortexa8/omap3/Makefile
@@ -26,10 +26,10 @@ include $(TOPDIR)/config.mk
 LIB	=  $(obj)lib$(SOC).a
 
 SOBJS	:= lowlevel_init.o
+SOBJS	+= cache.o
 SOBJS	+= reset.o
 
 COBJS	+= board.o
-COBJS	+= cache.o
 COBJS	+= clock.o
 COBJS	+= gpio.o
 COBJS	+= mem.o
diff --git a/cpu/arm_cortexa8/omap3/board.c b/cpu/arm_cortexa8/omap3/board.c
index b8bd0524af9b527b689437293a0ea38fc7ae468f..dd2c94073b3805494cdc1a476c23c8ec62075fee 100644
--- a/cpu/arm_cortexa8/omap3/board.c
+++ b/cpu/arm_cortexa8/omap3/board.c
@@ -201,7 +201,7 @@ void s_init(void)
 	 * Right now flushing at low MPU speed.
 	 * Need to move after clock init
 	 */
-	v7_flush_dcache_all(get_device_type());
+	invalidate_dcache(get_device_type());
 #ifndef CONFIG_ICACHE_OFF
 	icache_enable();
 #endif
diff --git a/cpu/arm_cortexa8/omap3/cache.S b/cpu/arm_cortexa8/omap3/cache.S
new file mode 100644
index 0000000000000000000000000000000000000000..0f63815359e40c3e31282355d6a72f459205c639
--- /dev/null
+++ b/cpu/arm_cortexa8/omap3/cache.S
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2009 Wind River Systems, Inc.
+ * Tom Rix <Tom.Rix@windriver.com>
+ *
+ * This file is based on and replaces the existing cache.c file
+ * The copyrights for the cache.c file are:
+ *
+ * (C) Copyright 2008 Texas Insturments
+ *
+ * (C) Copyright 2002
+ * Sysgo Real-Time Solutions, GmbH <www.elinos.com>
+ * Marius Groeger <mgroeger@sysgo.de>
+ *
+ * (C) Copyright 2002
+ * Gary Jennejohn, DENX Software Engineering, <gj@denx.de>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <asm/arch/omap3.h>
+
+/*
+ * omap3 cache code
+ */
+
+.align 5
+.global invalidate_dcache
+.global l2_cache_enable
+.global l2_cache_disable
+
+/*
+ *	invalidate_dcache()
+ *
+ *	Invalidate the whole D-cache.
+ *
+ *	Corrupted registers: r0-r5, r7, r9-r11
+ *
+ *	- mm	- mm_struct describing address space
+ */
+invalidate_dcache:
+	stmfd	r13!, {r0 - r5, r7, r9 - r12, r14}
+
+	mov	r7, r0				@ take a backup of device type
+	cmp	r0, #0x3			@ check if the device type is
+						@ GP
+	moveq r12, #0x1				@ set up to invalide L2
+smi:	.word 0x01600070			@ Call SMI monitor (smieq)
+	cmp	r7, #0x3			@ compare again in case its
+						@ lost
+	beq	finished_inval			@ if GP device, inval done
+						@ above
+
+	mrc	p15, 1, r0, c0, c0, 1		@ read clidr
+	ands	r3, r0, #0x7000000		@ extract loc from clidr
+	mov	r3, r3, lsr #23			@ left align loc bit field
+	beq	finished_inval			@ if loc is 0, then no need to
+						@ clean
+	mov	r10, #0				@ start clean at cache level 0
+inval_loop1:
+	add	r2, r10, r10, lsr #1		@ work out 3x current cache
+						@ level
+	mov	r1, r0, lsr r2			@ extract cache type bits from
+						@ clidr
+	and	r1, r1, #7			@ mask of the bits for current
+						@ cache only
+	cmp	r1, #2				@ see what cache we have at
+						@ this level
+	blt	skip_inval			@ skip if no cache, or just
+						@ i-cache
+	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level
+						@ in cssr
+	mov	r2, #0				@ operand for mcr SBZ
+	mcr	p15, 0, r2, c7, c5, 4		@ flush prefetch buffer to
+						@ sych the new cssr&csidr,
+						@ with armv7 this is 'isb',
+						@ but we compile with armv5
+	mrc	p15, 1, r1, c0, c0, 0		@ read the new csidr
+	and	r2, r1, #7			@ extract the length of the
+						@ cache lines
+	add	r2, r2, #4			@ add 4 (line length offset)
+	ldr	r4, =0x3ff
+	ands	r4, r4, r1, lsr #3		@ find maximum number on the
+						@ way size
+	clz	r5, r4				@ find bit position of way
+						@ size increment
+	ldr	r7, =0x7fff
+	ands	r7, r7, r1, lsr #13		@ extract max number of the
+						@ index size
+inval_loop2:
+	mov	r9, r4				@ create working copy of max
+						@ way size
+inval_loop3:
+	orr	r11, r10, r9, lsl r5		@ factor way and cache number
+						@ into r11
+	orr	r11, r11, r7, lsl r2		@ factor index number into r11
+	mcr	p15, 0, r11, c7, c6, 2		@ invalidate by set/way
+	subs	r9, r9, #1			@ decrement the way
+	bge	inval_loop3
+	subs	r7, r7, #1			@ decrement the index
+	bge	inval_loop2
+skip_inval:
+	add	r10, r10, #2			@ increment cache number
+	cmp	r3, r10
+	bgt	inval_loop1
+finished_inval:
+	mov	r10, #0				@ swith back to cache level 0
+	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level
+						@ in cssr
+	mcr	p15, 0, r10, c7, c5, 4		@ flush prefetch buffer,
+						@ with armv7 this is 'isb',
+						@ but we compile with armv5
+
+	ldmfd	r13!, {r0 - r5, r7, r9 - r12, pc}
+
+
+l2_cache_enable:
+	push	{r0, r1, r2, lr}
+	@ ES2 onwards we can disable/enable L2 ourselves
+	bl	get_cpu_rev
+	cmp	r0, #CPU_3XX_ES20
+	blt	l2_cache_disable_EARLIER_THAN_ES2
+	mrc	15, 0, r3, cr1, cr0, 1
+	orr	r3, r3, #2
+	mcr	15, 0, r3, cr1, cr0, 1
+	b	l2_cache_enable_END
+l2_cache_enable_EARLIER_THAN_ES2:
+	@ Save r0, r12 and restore them after usage
+	mov	r3, ip
+	str	r3, [sp, #4]
+	mov	r3, r0
+	@
+	@ GP Device ROM code API usage here
+	@ r12 = AUXCR Write function and r0 value
+	@
+	mov	ip, #3
+	mrc	15, 0, r0, cr1, cr0, 1
+	orr	r0, r0, #2
+	@ SMI instruction to call ROM Code API
+	.word	0xe1600070
+	mov	r0, r3
+	mov	ip, r3
+	str	r3, [sp, #4]
+l2_cache_enable_END:
+	pop	{r1, r2, r3, pc}
+
+
+l2_cache_disable:
+	push	{r0, r1, r2, lr}
+	@ ES2 onwards we can disable/enable L2 ourselves
+	bl	get_cpu_rev
+	cmp	r0, #CPU_3XX_ES20
+	blt	l2_cache_disable_EARLIER_THAN_ES2
+	mrc	15, 0, r3, cr1, cr0, 1
+	bic	r3, r3, #2
+	mcr	15, 0, r3, cr1, cr0, 1
+	b	l2_cache_disable_END
+l2_cache_disable_EARLIER_THAN_ES2:
+	@ Save r0, r12 and restore them after usage
+	mov	r3, ip
+	str	r3, [sp, #4]
+	mov	r3, r0
+	@
+	@ GP Device ROM code API usage here
+	@ r12 = AUXCR Write function and r0 value
+	@
+	mov	ip, #3
+	mrc	15, 0, r0, cr1, cr0, 1
+	bic	r0, r0, #2
+	@ SMI instruction to call ROM Code API
+	.word	0xe1600070
+	mov	r0, r3
+	mov	ip, r3
+	str	r3, [sp, #4]
+l2_cache_disable_END:
+	pop	{r1, r2, r3, pc}
diff --git a/cpu/arm_cortexa8/omap3/cache.c b/cpu/arm_cortexa8/omap3/cache.c
deleted file mode 100644
index 0d5b444be1d26c22e6ee3d8b7323cd81e0a41588..0000000000000000000000000000000000000000
--- a/cpu/arm_cortexa8/omap3/cache.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * (C) Copyright 2008 Texas Insturments
- *
- * (C) Copyright 2002
- * Sysgo Real-Time Solutions, GmbH <www.elinos.com>
- * Marius Groeger <mgroeger@sysgo.de>
- *
- * (C) Copyright 2002
- * Gary Jennejohn, DENX Software Engineering, <gj@denx.de>
- *
- * See file CREDITS for list of people who contributed to this
- * project.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
- * MA 02111-1307 USA
- */
-
-/*
- * omap3 L2 cache code
- */
-
-#include <common.h>
-#include <asm/arch/sys_proto.h>
-#include <asm/cache.h>
-
-void l2_cache_enable(void)
-{
-	unsigned long i;
-	volatile unsigned int j;
-
-	/* ES2 onwards we can disable/enable L2 ourselves */
-	if (get_cpu_rev() >= CPU_3XX_ES20) {
-		__asm__ __volatile__("mrc p15, 0, %0, c1, c0, 1":"=r"(i));
-		__asm__ __volatile__("orr %0, %0, #0x2":"=r"(i));
-		__asm__ __volatile__("mcr p15, 0, %0, c1, c0, 1":"=r"(i));
-	} else {
-		/* Save r0, r12 and restore them after usage */
-		__asm__ __volatile__("mov %0, r12":"=r"(j));
-		__asm__ __volatile__("mov %0, r0":"=r"(i));
-
-		/*
-		 * GP Device ROM code API usage here
-		 * r12 = AUXCR Write function and r0 value
-		 */
-		__asm__ __volatile__("mov r12, #0x3");
-		__asm__ __volatile__("mrc p15, 0, r0, c1, c0, 1");
-		__asm__ __volatile__("orr r0, r0, #0x2");
-		/* SMI instruction to call ROM Code API */
-		__asm__ __volatile__(".word 0xE1600070");
-		__asm__ __volatile__("mov r0, %0":"=r"(i));
-		__asm__ __volatile__("mov r12, %0":"=r"(j));
-	}
-
-}
-
-void l2_cache_disable(void)
-{
-	unsigned long i;
-	volatile unsigned int j;
-
-	/* ES2 onwards we can disable/enable L2 ourselves */
-	if (get_cpu_rev() >= CPU_3XX_ES20) {
-		__asm__ __volatile__("mrc p15, 0, %0, c1, c0, 1":"=r"(i));
-		__asm__ __volatile__("bic %0, %0, #0x2":"=r"(i));
-		__asm__ __volatile__("mcr p15, 0, %0, c1, c0, 1":"=r"(i));
-	} else {
-		/* Save r0, r12 and restore them after usage */
-		__asm__ __volatile__("mov %0, r12":"=r"(j));
-		__asm__ __volatile__("mov %0, r0":"=r"(i));
-
-		/*
-		 * GP Device ROM code API usage here
-		 * r12 = AUXCR Write function and r0 value
-		 */
-		__asm__ __volatile__("mov r12, #0x3");
-		__asm__ __volatile__("mrc p15, 0, r0, c1, c0, 1");
-		__asm__ __volatile__("bic r0, r0, #0x2");
-		/* SMI instruction to call ROM Code API */
-		__asm__ __volatile__(".word 0xE1600070");
-		__asm__ __volatile__("mov r0, %0":"=r"(i));
-		__asm__ __volatile__("mov r12, %0":"=r"(j));
-	}
-}
diff --git a/cpu/arm_cortexa8/start.S b/cpu/arm_cortexa8/start.S
index 6bd65521bdf29248a8e6713fead9ea5e03cf1d9a..14a9bd3b039143f92aed2133493aa9ee6a175738 100644
--- a/cpu/arm_cortexa8/start.S
+++ b/cpu/arm_cortexa8/start.S
@@ -415,88 +415,3 @@ fiq:
 
 #endif
 
-/*
- *	v7_flush_dcache_all()
- *
- *	Flush the whole D-cache.
- *
- *	Corrupted registers: r0-r5, r7, r9-r11
- *
- *	- mm	- mm_struct describing address space
- */
-	.align 5
-.global v7_flush_dcache_all
-v7_flush_dcache_all:
-	stmfd	r13!, {r0 - r5, r7, r9 - r12, r14}
-
-	mov	r7, r0				@ take a backup of device type
-	cmp	r0, #0x3			@ check if the device type is
-						@ GP
-	moveq r12, #0x1				@ set up to invalide L2
-smi:	.word 0x01600070			@ Call SMI monitor (smieq)
-	cmp	r7, #0x3			@ compare again in case its
-						@ lost
-	beq	finished_inval			@ if GP device, inval done
-						@ above
-
-	mrc	p15, 1, r0, c0, c0, 1		@ read clidr
-	ands	r3, r0, #0x7000000		@ extract loc from clidr
-	mov	r3, r3, lsr #23			@ left align loc bit field
-	beq	finished_inval			@ if loc is 0, then no need to
-						@ clean
-	mov	r10, #0				@ start clean at cache level 0
-inval_loop1:
-	add	r2, r10, r10, lsr #1		@ work out 3x current cache
-						@ level
-	mov	r1, r0, lsr r2			@ extract cache type bits from
-						@ clidr
-	and	r1, r1, #7			@ mask of the bits for current
-						@ cache only
-	cmp	r1, #2				@ see what cache we have at
-						@ this level
-	blt	skip_inval			@ skip if no cache, or just
-						@ i-cache
-	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level
-						@ in cssr
-	mov	r2, #0				@ operand for mcr SBZ
-	mcr	p15, 0, r2, c7, c5, 4		@ flush prefetch buffer to
-						@ sych the new cssr&csidr,
-						@ with armv7 this is 'isb',
-						@ but we compile with armv5
-	mrc	p15, 1, r1, c0, c0, 0		@ read the new csidr
-	and	r2, r1, #7			@ extract the length of the
-						@ cache lines
-	add	r2, r2, #4			@ add 4 (line length offset)
-	ldr	r4, =0x3ff
-	ands	r4, r4, r1, lsr #3		@ find maximum number on the
-						@ way size
-	clz	r5, r4				@ find bit position of way
-						@ size increment
-	ldr	r7, =0x7fff
-	ands	r7, r7, r1, lsr #13		@ extract max number of the
-						@ index size
-inval_loop2:
-	mov	r9, r4				@ create working copy of max
-						@ way size
-inval_loop3:
-	orr	r11, r10, r9, lsl r5		@ factor way and cache number
-						@ into r11
-	orr	r11, r11, r7, lsl r2		@ factor index number into r11
-	mcr	p15, 0, r11, c7, c6, 2		@ invalidate by set/way
-	subs	r9, r9, #1			@ decrement the way
-	bge	inval_loop3
-	subs	r7, r7, #1			@ decrement the index
-	bge	inval_loop2
-skip_inval:
-	add	r10, r10, #2			@ increment cache number
-	cmp	r3, r10
-	bgt	inval_loop1
-finished_inval:
-	mov	r10, #0				@ swith back to cache level 0
-	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level
-						@ in cssr
-	mcr	p15, 0, r10, c7, c5, 4		@ flush prefetch buffer,
-						@ with armv7 this is 'isb',
-						@ but we compile with armv5
-
-	ldmfd	r13!, {r0 - r5, r7, r9 - r12, pc}
diff --git a/include/asm-arm/arch-omap3/omap3.h b/include/asm-arm/arch-omap3/omap3.h
index 6459d992bd1c2431826c46eb44c9a10376c1ea03..12815f694f42bc828f6253fc964b1d58544b8b12 100644
--- a/include/asm-arm/arch-omap3/omap3.h
+++ b/include/asm-arm/arch-omap3/omap3.h
@@ -168,6 +168,8 @@ struct gpio {
  *  ES1     = rev 0
  *
  *  ES2 onwards, the value maps to contents of IDCODE register [31:28].
+ *
+ * Note : CPU_3XX_ES20 is used in cache.S.  Please review before changing.
  */
 #define CPU_3XX_ES10		0
 #define CPU_3XX_ES20		1
diff --git a/include/asm-arm/arch-omap3/sys_proto.h b/include/asm-arm/arch-omap3/sys_proto.h
index 7361d08961200995840b970000e8ab30e68e933c..2246f801e5f74116070c8738745b7f873e947325 100644
--- a/include/asm-arm/arch-omap3/sys_proto.h
+++ b/include/asm-arm/arch-omap3/sys_proto.h
@@ -55,7 +55,7 @@ void secureworld_exit(void);
 void setup_auxcr(void);
 void try_unlock_memory(void);
 u32 get_boot_type(void);
-void v7_flush_dcache_all(u32);
+void invalidate_dcache(u32);
 void sr32(void *, u32, u32, u32);
 u32 wait_on_value(u32, u32, void *, u32);
 void sdelay(unsigned long);