s390/kdump: add support for vector extension

With this patch for kdump the s390 vector registers are stored into the
prepared save areas in the old kernel and into the REGSET_VX_LOW and
REGSET_VX_HIGH ELF notes for /proc/vmcore in the new kernel.

The NT_S390_VXRS_LOW note contains the lower halves of the first 16 vector
registers 0-15. The higher halves are stored in the floating point register
ELF note.  The NT_S390_VXRS_HIGH contains the full vector registers 16-31.

The kernel provides a save area for storing vector register in case of
machine checks. A pointer to this save are is stored in the CPU lowcore
at offset 0x11b0. This save area is also used to save the registers for
kdump. In case of a dumped crashed kdump those areas are used to extract
the registers of the production system.

The vector registers for remote CPUs are stored using the "store additional
status at address" SIGP. For the dump CPU the vector registers are stored
with the VSTM instruction.

With this patch also zfcpdump stores the vector registers.

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 27735ae..f6e43d3 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -226,6 +226,6 @@
 extern unsigned long arch_randomize_brk(struct mm_struct *mm);
 #define arch_randomize_brk arch_randomize_brk
 
-void *fill_cpu_elf_notes(void *ptr, struct save_area *sa);
+void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vxrs);
 
 #endif
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index c81661e..ece606c 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -89,12 +89,12 @@
 extern u32 dump_prefix_page;
 
 struct dump_save_areas {
-	struct save_area **areas;
+	struct save_area_ext **areas;
 	int count;
 };
 
 extern struct dump_save_areas dump_save_areas;
-struct save_area *dump_save_area_create(int cpu);
+struct save_area_ext *dump_save_area_create(int cpu);
 
 extern void do_reipl(void);
 extern void do_halt(void);
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index d812cf1..6cc51fe 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -11,6 +11,7 @@
 #include <linux/types.h>
 #include <asm/ptrace.h>
 #include <asm/cpu.h>
+#include <asm/types.h>
 
 #ifdef CONFIG_32BIT
 
@@ -31,6 +32,11 @@
 	u32	ctrl_regs[16];
 } __packed;
 
+struct save_area_ext {
+	struct save_area	sa;
+	__vector128		vx_regs[32];
+};
+
 struct _lowcore {
 	psw_t	restart_psw;			/* 0x0000 */
 	psw_t	restart_old_psw;		/* 0x0008 */
@@ -183,6 +189,11 @@
 	u64	ctrl_regs[16];
 } __packed;
 
+struct save_area_ext {
+	struct save_area	sa;
+	__vector128		vx_regs[32];
+};
+
 struct _lowcore {
 	__u8	pad_0x0000[0x0014-0x0000];	/* 0x0000 */
 	__u32	ipl_parmblock_ptr;		/* 0x0014 */
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index bf9c823..4957611 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -15,6 +15,7 @@
 #define SIGP_SET_ARCHITECTURE	     18
 #define SIGP_COND_EMERGENCY_SIGNAL   19
 #define SIGP_SENSE_RUNNING	     21
+#define SIGP_STORE_ADDITIONAL_STATUS 23
 
 /* SIGP condition codes */
 #define SIGP_CC_ORDER_CODE_ACCEPTED 0
@@ -33,9 +34,10 @@
 
 #ifndef __ASSEMBLY__
 
-static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status)
+static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm,
+			      u32 *status)
 {
-	register unsigned int reg1 asm ("1") = parm;
+	register unsigned long reg1 asm ("1") = parm;
 	int cc;
 
 	asm volatile(
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index 0e01095..2542a7e 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -114,6 +114,19 @@
 		: "=Q" (*(addrtype *) vxrs) : : "1");
 }
 
+static inline void save_vx_regs_safe(__vector128 *vxrs)
+{
+	unsigned long cr0, flags;
+
+	flags = arch_local_irq_save();
+	__ctl_store(cr0, 0, 0);
+	__ctl_set_bit(0, 17);
+	__ctl_set_bit(0, 18);
+	save_vx_regs(vxrs);
+	__ctl_load(cr0, 0, 0);
+	arch_local_irq_restore(flags);
+}
+
 static inline void restore_vx_regs(__vector128 *vxrs)
 {
 	typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index a3b9150..9f73c80 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -46,9 +46,9 @@
 /*
  * Allocate and add a save area for a CPU
  */
-struct save_area *dump_save_area_create(int cpu)
+struct save_area_ext *dump_save_area_create(int cpu)
 {
-	struct save_area **save_areas, *save_area;
+	struct save_area_ext **save_areas, *save_area;
 
 	save_area = kmalloc(sizeof(*save_area), GFP_KERNEL);
 	if (!save_area)
@@ -386,9 +386,45 @@
 }
 
 /*
+ * Initialize vxrs high note (full 128 bit VX registers 16-31)
+ */
+static void *nt_s390_vx_high(void *ptr, __vector128 *vx_regs)
+{
+	return nt_init(ptr, NT_S390_VXRS_HIGH, &vx_regs[16],
+		       16 * sizeof(__vector128), KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize vxrs low note (lower halves of VX registers 0-15)
+ */
+static void *nt_s390_vx_low(void *ptr, __vector128 *vx_regs)
+{
+	Elf64_Nhdr *note;
+	u64 len;
+	int i;
+
+	note = (Elf64_Nhdr *)ptr;
+	note->n_namesz = strlen(KEXEC_CORE_NOTE_NAME) + 1;
+	note->n_descsz = 16 * 8;
+	note->n_type = NT_S390_VXRS_LOW;
+	len = sizeof(Elf64_Nhdr);
+
+	memcpy(ptr + len, KEXEC_CORE_NOTE_NAME, note->n_namesz);
+	len = roundup(len + note->n_namesz, 4);
+
+	ptr += len;
+	/* Copy lower halves of SIMD registers 0-15 */
+	for (i = 0; i < 16; i++) {
+		memcpy(ptr, &vx_regs[i], 8);
+		ptr += 8;
+	}
+	return ptr;
+}
+
+/*
  * Fill ELF notes for one CPU with save area registers
  */
-void *fill_cpu_elf_notes(void *ptr, struct save_area *sa)
+void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vx_regs)
 {
 	ptr = nt_prstatus(ptr, sa);
 	ptr = nt_fpregset(ptr, sa);
@@ -397,6 +433,10 @@
 	ptr = nt_s390_tod_preg(ptr, sa);
 	ptr = nt_s390_ctrs(ptr, sa);
 	ptr = nt_s390_prefix(ptr, sa);
+	if (MACHINE_HAS_VX && vx_regs) {
+		ptr = nt_s390_vx_low(ptr, vx_regs);
+		ptr = nt_s390_vx_high(ptr, vx_regs);
+	}
 	return ptr;
 }
 
@@ -484,7 +524,7 @@
 	int i, cpus = 0;
 
 	for (i = 0; i < dump_save_areas.count; i++) {
-		if (dump_save_areas.areas[i]->pref_reg == 0)
+		if (dump_save_areas.areas[i]->sa.pref_reg == 0)
 			continue;
 		cpus++;
 	}
@@ -530,17 +570,17 @@
  */
 static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
 {
-	struct save_area *sa;
+	struct save_area_ext *sa_ext;
 	void *ptr_start = ptr;
 	int i;
 
 	ptr = nt_prpsinfo(ptr);
 
 	for (i = 0; i < dump_save_areas.count; i++) {
-		sa = dump_save_areas.areas[i];
-		if (sa->pref_reg == 0)
+		sa_ext = dump_save_areas.areas[i];
+		if (sa_ext->sa.pref_reg == 0)
 			continue;
-		ptr = fill_cpu_elf_notes(ptr, sa);
+		ptr = fill_cpu_elf_notes(ptr, &sa_ext->sa, sa_ext->vx_regs);
 	}
 	ptr = nt_vmcoreinfo(ptr);
 	memset(phdr, 0, sizeof(*phdr));
@@ -581,7 +621,7 @@
 
 	mem_chunk_cnt = get_mem_chunk_cnt();
 
-	alloc_size = 0x1000 + get_cpu_cnt() * 0x300 +
+	alloc_size = 0x1000 + get_cpu_cnt() * 0x4a0 +
 		mem_chunk_cnt * sizeof(Elf64_Phdr);
 	hdr = kzalloc_panic(alloc_size);
 	/* Init elf header */
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 719e27b..4685337 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -25,6 +25,7 @@
 #include <asm/elf.h>
 #include <asm/asm-offsets.h>
 #include <asm/os_info.h>
+#include <asm/switch_to.h>
 
 typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long);
 
@@ -43,7 +44,7 @@
 
 	memcpy((void *) (4608UL + sa->pref_reg), sa, sizeof(*sa));
 	ptr = (u64 *) per_cpu_ptr(crash_notes, cpu);
-	ptr = fill_cpu_elf_notes(ptr, sa);
+	ptr = fill_cpu_elf_notes(ptr, sa, NULL);
 	memset(ptr, 0, sizeof(struct elf_note));
 }
 
@@ -53,8 +54,11 @@
 static void setup_regs(void)
 {
 	unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE;
+	struct _lowcore *lc;
 	int cpu, this_cpu;
 
+	/* Get lowcore pointer from store status of this CPU (absolute zero) */
+	lc = (struct _lowcore *)(unsigned long)S390_lowcore.prefixreg_save_area;
 	this_cpu = smp_find_processor_id(stap());
 	add_elf_notes(this_cpu);
 	for_each_online_cpu(cpu) {
@@ -64,6 +68,8 @@
 			continue;
 		add_elf_notes(cpu);
 	}
+	if (MACHINE_HAS_VX)
+		save_vx_regs_safe((void *) lc->vector_save_area_addr);
 	/* Copy dump CPU store status info to absolute zero */
 	memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area));
 }
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 13cae5b..6fd9e60 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -83,7 +83,8 @@
 /*
  * Signal processor helper functions.
  */
-static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status)
+static inline int __pcpu_sigp_relax(u16 addr, u8 order, unsigned long parm,
+				    u32 *status)
 {
 	int cc;
 
@@ -515,35 +516,53 @@
 static void __init smp_get_save_area(int cpu, u16 address)
 {
 	void *lc = pcpu_devices[0].lowcore;
-	struct save_area *save_area;
+	struct save_area_ext *sa_ext;
+	unsigned long vx_sa;
 
 	if (is_kdump_kernel())
 		return;
 	if (!OLDMEM_BASE && (address == boot_cpu_address ||
 			     ipl_info.type != IPL_TYPE_FCP_DUMP))
 		return;
-	save_area = dump_save_area_create(cpu);
-	if (!save_area)
+	sa_ext = dump_save_area_create(cpu);
+	if (!sa_ext)
 		panic("could not allocate memory for save area\n");
 	if (address == boot_cpu_address) {
 		/* Copy the registers of the boot cpu. */
-		copy_oldmem_page(1, (void *) save_area, sizeof(*save_area),
+		copy_oldmem_page(1, (void *) &sa_ext->sa, sizeof(sa_ext->sa),
 				 SAVE_AREA_BASE - PAGE_SIZE, 0);
+		if (MACHINE_HAS_VX)
+			save_vx_regs_safe(sa_ext->vx_regs);
 		return;
 	}
 	/* Get the registers of a non-boot cpu. */
 	__pcpu_sigp_relax(address, SIGP_STOP_AND_STORE_STATUS, 0, NULL);
-	memcpy_real(save_area, lc + SAVE_AREA_BASE, sizeof(*save_area));
+	memcpy_real(&sa_ext->sa, lc + SAVE_AREA_BASE, sizeof(sa_ext->sa));
+	if (!MACHINE_HAS_VX)
+		return;
+	/* Get the VX registers */
+	vx_sa = __get_free_page(GFP_KERNEL);
+	if (!vx_sa)
+		panic("could not allocate memory for VX save area\n");
+	__pcpu_sigp_relax(address, SIGP_STORE_ADDITIONAL_STATUS, vx_sa, NULL);
+	memcpy(sa_ext->vx_regs, (void *) vx_sa, sizeof(sa_ext->vx_regs));
+	free_page(vx_sa);
 }
 
 int smp_store_status(int cpu)
 {
+	unsigned long vx_sa;
 	struct pcpu *pcpu;
 
 	pcpu = pcpu_devices + cpu;
 	if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS,
 			      0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED)
 		return -EIO;
+	if (!MACHINE_HAS_VX)
+		return 0;
+	vx_sa = __pa(pcpu->lowcore->vector_save_area_addr);
+	__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
+			  vx_sa, NULL);
 	return 0;
 }
 
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
index 1884653..efcf484 100644
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c
@@ -28,6 +28,7 @@
 #include <asm/processor.h>
 #include <asm/irqflags.h>
 #include <asm/checksum.h>
+#include <asm/switch_to.h>
 #include "sclp.h"
 
 #define TRACE(x...) debug_sprintf_event(zcore_dbf, 1, x)
@@ -149,18 +150,21 @@
 
 static int __init init_cpu_info(enum arch_id arch)
 {
-	struct save_area *sa;
+	struct save_area_ext *sa_ext;
 
 	/* get info for boot cpu from lowcore, stored in the HSA */
 
-	sa = dump_save_area_create(0);
-	if (!sa)
+	sa_ext = dump_save_area_create(0);
+	if (!sa_ext)
 		return -ENOMEM;
-	if (memcpy_hsa_kernel(sa, sys_info.sa_base, sys_info.sa_size) < 0) {
+	if (memcpy_hsa_kernel(&sa_ext->sa, sys_info.sa_base,
+			      sys_info.sa_size) < 0) {
 		TRACE("could not copy from HSA\n");
-		kfree(sa);
+		kfree(sa_ext);
 		return -EIO;
 	}
+	if (MACHINE_HAS_VX)
+		save_vx_regs_safe(sa_ext->vx_regs);
 	return 0;
 }
 
@@ -258,7 +262,7 @@
 		unsigned long sa_start, sa_end; /* save area range */
 		unsigned long prefix;
 		unsigned long sa_off, len, buf_off;
-		struct save_area *save_area = dump_save_areas.areas[i];
+		struct save_area *save_area = &dump_save_areas.areas[i]->sa;
 
 		prefix = save_area->pref_reg;
 		sa_start = prefix + sys_info.sa_base;
@@ -612,7 +616,7 @@
 	hdr->tod = get_tod_clock();
 	get_cpu_id(&hdr->cpu_id);
 	for (i = 0; i < dump_save_areas.count; i++) {
-		prefix = dump_save_areas.areas[i]->pref_reg;
+		prefix = dump_save_areas.areas[i]->sa.pref_reg;
 		hdr->real_cpu_cnt++;
 		if (!prefix)
 			continue;