Here is the latest version of the CPU race patch. Pretty sure you asked me for this. Somewhere, sometime. (100-cpu_race_R3) -apw During the shutdown process cpus are taken offline. If this process overlaps with an ongoing TLB flush we can attempt to flush a cpu which is already offline. This triggers a BUG_ON in tlb_flush_others(). This occurs because we do not remove an outgoing cpu from the VM state. This patch does two things. Firstly it validates the cpu mask passed for the flush against the current online cpu mask preventing us from flushing no longer valid cpus. Secondly it ensures that the cpus found to be online at this stage cannot go completly offline before the TLB flush is complete. diff -upN reference/arch/i386/kernel/smp.c current/arch/i386/kernel/smp.c --- reference/arch/i386/kernel/smp.c 2004-05-09 13:44:04.000000000 -0700 +++ current/arch/i386/kernel/smp.c 2004-05-09 13:44:06.000000000 -0700 @@ -357,8 +357,6 @@ static void flush_tlb_others(cpumask_t c */ BUG_ON(cpus_empty(cpumask)); - cpus_and(tmp, cpumask, cpu_online_map); - BUG_ON(!cpus_equal(cpumask, tmp)); BUG_ON(cpu_isset(smp_processor_id(), cpumask)); BUG_ON(!mm); @@ -369,16 +367,26 @@ static void flush_tlb_others(cpumask_t c * detected by the NMI watchdog. */ spin_lock(&tlbstate_lock); - + + /* + * Subtle, mask the request mask with the currently online cpu's. + * Sample this under the lock; cpus in the middle of going offline + * will wait until there is noone in this critical section before + * disabling IPI handling. + */ + cpus_and(tmp, cpumask, cpu_online_map); + if(cpus_empty(tmp)) + goto out_unlock; + flush_mm = mm; flush_va = va; #if NR_CPUS <= BITS_PER_LONG - atomic_set_mask(cpumask, &flush_cpumask); + atomic_set_mask(tmp, &flush_cpumask); #else { int k; unsigned long *flush_mask = (unsigned long *)&flush_cpumask; - unsigned long *cpu_mask = (unsigned long *)&cpumask; + unsigned long *cpu_mask = (unsigned long *)&tmp; for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k) atomic_set_mask(cpu_mask[k], &flush_mask[k]); } @@ -387,7 +395,7 @@ static void flush_tlb_others(cpumask_t c * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); + send_IPI_mask(tmp, INVALIDATE_TLB_VECTOR); while (!cpus_empty(flush_cpumask)) /* nothing. lockup detection does not belong here */ @@ -395,6 +403,7 @@ static void flush_tlb_others(cpumask_t c flush_mm = NULL; flush_va = 0; +out_unlock: spin_unlock(&tlbstate_lock); } @@ -514,10 +523,7 @@ int smp_call_function (void (*func) (voi */ { struct call_data_struct data; - int cpus = num_online_cpus()-1; - - if (!cpus) - return 0; + int cpus; data.func = func; data.info = info; @@ -527,6 +533,15 @@ int smp_call_function (void (*func) (voi atomic_set(&data.finished, 0); spin_lock(&call_lock); + + /* Subtle, get the current number of online cpus. + * Sample this under the lock; cpus in the middle of going offline + * will wait until there is noone in this critical section before + * disabling IPI handling. */ + cpus = num_online_cpus()-1; + if (!cpus) + goto out_unlock; + call_data = &data; mb(); @@ -540,6 +555,7 @@ int smp_call_function (void (*func) (voi if (wait) while (atomic_read(&data.finished) != cpus) barrier(); +out_unlock: spin_unlock(&call_lock); return 0; @@ -551,6 +567,20 @@ static void stop_this_cpu (void * dummy) * Remove this CPU: */ cpu_clear(smp_processor_id(), cpu_online_map); + + /* Subtle, IPI users assume that they will be able to get IPI's + * through to the cpus listed in cpu_online_map. To ensure this + * we add the requirement that they check cpu_online_map within + * the IPI critical sections. Here we remove ourselves from the + * map, then ensure that all other cpus have left the relevant + * critical sections since the change. We do this by acquiring + * the relevant section locks, if we have them noone else is in + * them. Once this is done we can go offline. */ + spin_lock(&tlbstate_lock); + spin_unlock(&tlbstate_lock); + spin_lock(&call_lock); + spin_unlock(&call_lock); + local_irq_disable(); disable_local_APIC(); if (cpu_data[smp_processor_id()].hlt_works_ok) diff -upN reference/arch/x86_64/kernel/smp.c current/arch/x86_64/kernel/smp.c --- reference/arch/x86_64/kernel/smp.c 2003-11-24 16:12:28.000000000 -0800 +++ current/arch/x86_64/kernel/smp.c 2004-05-09 13:44:06.000000000 -0700 @@ -243,8 +243,6 @@ static void flush_tlb_others(cpumask_t c * - mask must exist :) */ BUG_ON(cpus_empty(cpumask)); - cpus_and(tmp, cpumask, cpu_online_map); - BUG_ON(!cpus_equal(tmp, cpumask)); BUG_ON(cpu_isset(smp_processor_id(), cpumask)); if (!mm) BUG(); @@ -257,21 +255,30 @@ static void flush_tlb_others(cpumask_t c */ spin_lock(&tlbstate_lock); + /* Subtle, mask the request mask with the currently online cpu's. + * Sample this under the lock; cpus in the middle of going offline + * will wait until there is noone in this critical section before + * disabling IPI handling. */ + cpus_and(tmp, cpumask, cpu_online_map); + if(cpus_empty(tmp)) + goto out_unlock; + flush_mm = mm; flush_va = va; - cpus_or(flush_cpumask, cpumask, flush_cpumask); + cpus_or(flush_cpumask, tmp, flush_cpumask); /* * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); + send_IPI_mask(tmp, INVALIDATE_TLB_VECTOR); while (!cpus_empty(flush_cpumask)) mb(); /* nothing. lockup detection does not belong here */; flush_mm = NULL; flush_va = 0; +out_unlock: spin_unlock(&tlbstate_lock); } @@ -399,10 +406,7 @@ int smp_call_function (void (*func) (voi */ { struct call_data_struct data; - int cpus = num_online_cpus()-1; - - if (!cpus) - return 0; + int cpus; data.func = func; data.info = info; @@ -412,6 +416,15 @@ int smp_call_function (void (*func) (voi atomic_set(&data.finished, 0); spin_lock(&call_lock); + + /* Subtle, get the current number of online cpus. + * Sample this under the lock; cpus in the middle of going offline + * will wait until there is noone in this critical section before + * disabling IPI handling. */ + cpus = num_online_cpus()-1; + if (!cpus) + goto out_unlock; + call_data = &data; wmb(); /* Send a message to all other CPUs and wait for them to respond */ @@ -424,6 +437,7 @@ int smp_call_function (void (*func) (voi if (wait) while (atomic_read(&data.finished) != cpus) barrier(); +out_unlock: spin_unlock(&call_lock); return 0; @@ -435,6 +449,20 @@ void smp_stop_cpu(void) * Remove this CPU: */ cpu_clear(smp_processor_id(), cpu_online_map); + + /* Subtle, IPI users assume that they will be able to get IPI's + * through to the cpus listed in cpu_online_map. To ensure this + * we add the requirement that they check cpu_online_map within + * the IPI critical sections. Here we remove ourselves from the + * map, then ensure that all other cpus have left the relevant + * critical sections since the change. We do this by acquiring + * the relevant section locks, if we have them noone else is in + * them. Once this is done we can go offline. */ + spin_lock(&tlbstate_lock); + spin_unlock(&tlbstate_lock); + spin_lock(&call_lock); + spin_unlock(&call_lock); + local_irq_disable(); disable_local_APIC(); local_irq_enable();