arm64: efi: Fix _start returns from failed _relocate

If _relocate fails we need to restore the frame pointer and the link
register and return from _start. But we've pushed x0 and x1 on below
the fp and lr, so, as the code was, we'd restore the wrong values.
Revert parts of the code back to the way they are in gnu-efi and move
the stack alignment below the loading of x0 and x1, after we've
confirmed _relocate didn't fail.

Fixes: d231b539a41f ("arm64: Use code from the gnu-efi when booting with EFI")
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Signed-off-by: Andrew Jones <andrew.jones@linux.dev>
diff --git a/arm/efi/crt0-efi-aarch64.S b/arm/efi/crt0-efi-aarch64.S
index 5d0dc04..5fd3dc9 100644
--- a/arm/efi/crt0-efi-aarch64.S
+++ b/arm/efi/crt0-efi-aarch64.S
@@ -111,17 +111,10 @@
 
 	.align		12
 _start:
-	stp		x29, x30, [sp, #-16]!
-
-	/* Align sp; this is necessary due to way we store cpu0's thread_info */
+	stp		x29, x30, [sp, #-32]!
 	mov		x29, sp
-	mov		x30, sp
-	and		x30, x30, #THREAD_MASK
-	mov		sp, x30
-	str		x29, [sp, #-16]!
 
-	stp		x0, x1, [sp, #-16]!
-
+	stp		x0, x1, [sp, #16]
 	mov		x2, x0
 	mov		x3, x1
 	adr		x0, ImageBase
@@ -130,12 +123,20 @@
 	bl		_relocate
 	cbnz		x0, 0f
 
-	ldp		x0, x1, [sp], #16
+	ldp		x0, x1, [sp, #16]
+
+	/* Align sp; this is necessary due to way we store cpu0's thread_info */
+	mov		x29, sp
+	mov		x30, sp
+	and		x30, x30, #THREAD_MASK
+	mov		sp, x30
+	str		x29, [sp, #-16]!
+
 	bl		efi_main
 
 	/* Restore sp */
 	ldr		x30, [sp], #16
-	mov             sp, x30
+	mov		sp, x30
 
-0:	ldp		x29, x30, [sp], #16
+0:	ldp		x29, x30, [sp], #32
 	ret