LoongArch: Always enumerate MADT and setup logical-physical CPU mapping

Some drivers want to use cpu_logical_map(), early_cpu_to_node() and some
other CPU mapping APIs, even if we use "nr_cpus=1" to hard limit the CPU
number. This is strongly required for the multi-bridges machines.

Currently, we stop parsing the MADT if the nr_cpus limit is reached, but
to achieve the above goal we should always enumerate the MADT table and
setup logical-physical CPU mapping whether there is a nr_cpus limit.

Rework the MADT enumeration:

1. Define a flag "cpu_enumerated" to distinguish the first enumeration
   (cpu_enumerated=0) and the physical hotplug case (cpu_enumerated=1)
   for set_processor_mask().

2. If cpu_enumerated=0, stop parsing only when NR_CPUS limit is reached,
   so we can setup logical-physical CPU mapping; if cpu_enumerated=1,
   stop parsing when nr_cpu_ids limit is reached, so we can avoid some
   runtime bugs. Once logical-physical CPU mapping is setup, we will let
   cpu_enumerated=1.

3. Use find_first_zero_bit() instead of cpumask_next_zero() to find the
   next zero bit (free logical CPU id) in the cpu_present_mask, because
   cpumask_next_zero() will stop at nr_cpu_ids.

4. Only touch cpu_possible_mask if cpu_enumerated=0, this is in order to
   avoid some potential crashes, because cpu_possible_mask is marked as
   __ro_after_init.

5. In prefill_possible_map(), clear cpu_present_mask bits greater than
   nr_cpu_ids, in order to avoid a CPU be "present" but not "possible".

Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c
index 5cf59c6..929a497 100644
--- a/arch/loongarch/kernel/acpi.c
+++ b/arch/loongarch/kernel/acpi.c
@@ -57,15 +57,22 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
 		return ioremap_cache(phys, size);
 }
 
+static int cpu_enumerated = 0;
+
 #ifdef CONFIG_SMP
 static int set_processor_mask(u32 id, u32 flags)
 {
-
+	int nr_cpus;
 	int cpu, cpuid = id;
 
-	if (num_processors >= nr_cpu_ids) {
-		pr_warn(PREFIX "nr_cpus/possible_cpus limit of %i reached."
-			" processor 0x%x ignored.\n", nr_cpu_ids, cpuid);
+	if (!cpu_enumerated)
+		nr_cpus = NR_CPUS;
+	else
+		nr_cpus = nr_cpu_ids;
+
+	if (num_processors >= nr_cpus) {
+		pr_warn(PREFIX "nr_cpus limit of %i reached."
+			" processor 0x%x ignored.\n", nr_cpus, cpuid);
 
 		return -ENODEV;
 
@@ -73,11 +80,13 @@ static int set_processor_mask(u32 id, u32 flags)
 	if (cpuid == loongson_sysconf.boot_cpu_id)
 		cpu = 0;
 	else
-		cpu = cpumask_next_zero(-1, cpu_present_mask);
+		cpu = find_first_zero_bit(cpumask_bits(cpu_present_mask), NR_CPUS);
+
+	if (!cpu_enumerated)
+		set_cpu_possible(cpu, true);
 
 	if (flags & ACPI_MADT_ENABLED) {
 		num_processors++;
-		set_cpu_possible(cpu, true);
 		set_cpu_present(cpu, true);
 		__cpu_number_map[cpuid] = cpu;
 		__cpu_logical_map[cpu] = cpuid;
@@ -138,6 +147,7 @@ static void __init acpi_process_madt(void)
 	acpi_table_parse_madt(ACPI_MADT_TYPE_EIO_PIC,
 			acpi_parse_eio_master, MAX_IO_PICS);
 
+	cpu_enumerated = 1;
 	loongson_sysconf.nr_cpus = num_processors;
 }
 
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index 3d048f1..0f0740f 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -576,8 +576,10 @@ static void __init prefill_possible_map(void)
 
 	for (i = 0; i < possible; i++)
 		set_cpu_possible(i, true);
-	for (; i < NR_CPUS; i++)
+	for (; i < NR_CPUS; i++) {
+		set_cpu_present(i, false);
 		set_cpu_possible(i, false);
+	}
 
 	set_nr_cpu_ids(possible);
 }
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index 1436d24..03b2b76 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -271,11 +271,10 @@ static void __init fdt_smp_setup(void)
 		if (cpuid >= nr_cpu_ids)
 			continue;
 
-		if (cpuid == loongson_sysconf.boot_cpu_id) {
+		if (cpuid == loongson_sysconf.boot_cpu_id)
 			cpu = 0;
-		} else {
-			cpu = cpumask_next_zero(-1, cpu_present_mask);
-		}
+		else
+			cpu = find_first_zero_bit(cpumask_bits(cpu_present_mask), NR_CPUS);
 
 		num_processors++;
 		set_cpu_possible(cpu, true);