@@ -3200,9 +3200,6 @@ struct softnet_data {
/* stats */
unsigned int processed;
unsigned int time_squeeze;
-#ifdef CONFIG_RPS
- struct softnet_data *rps_ipi_list;
-#endif
bool in_net_rx_action;
bool in_napi_threaded_poll;
@@ -3231,12 +3228,8 @@ struct softnet_data {
unsigned int input_queue_head ____cacheline_aligned_in_smp;
/* Elements below can be accessed between CPUs for RPS/RFS */
- call_single_data_t csd ____cacheline_aligned_in_smp;
- struct softnet_data *rps_ipi_next;
- unsigned int cpu;
unsigned int input_queue_tail;
#endif
- unsigned int received_rps;
unsigned int dropped;
struct sk_buff_head input_pkt_queue;
struct napi_struct backlog;
@@ -3244,9 +3237,7 @@ struct softnet_data {
/* Another possibly contended cache line */
spinlock_t defer_lock ____cacheline_aligned_in_smp;
int defer_count;
- int defer_ipi_scheduled;
struct sk_buff *defer_list;
- call_single_data_t defer_csd;
};
static inline void input_queue_head_incr(struct softnet_data *sd)
@@ -153,6 +153,7 @@
#include <linux/prandom.h>
#include <linux/once_lite.h>
#include <net/netdev_rx_queue.h>
+#include <linux/smpboot.h>
#include "dev.h"
#include "net-sysfs.h"
@@ -4409,6 +4410,7 @@ EXPORT_SYMBOL(__dev_direct_xmit);
/*************************************************************************
* Receiver routines
*************************************************************************/
+static DEFINE_PER_CPU(struct task_struct *, backlog_napi);
int netdev_max_backlog __read_mostly = 1000;
EXPORT_SYMBOL(netdev_max_backlog);
@@ -4441,6 +4443,9 @@ static inline void ____napi_schedule(struct softnet_data *sd,
*/
thread = READ_ONCE(napi->thread);
if (thread) {
+ if (thread == raw_cpu_read(backlog_napi))
+ goto use_local_napi;
+
/* Avoid doing set_bit() if the thread is in
* INTERRUPTIBLE state, cause napi_thread_wait()
* makes sure to proceed with napi polling
@@ -4453,6 +4458,7 @@ static inline void ____napi_schedule(struct softnet_data *sd,
}
}
+use_local_napi:
list_add_tail(&napi->poll_list, &sd->poll_list);
WRITE_ONCE(napi->list_owner, smp_processor_id());
/* If not called from net_rx_action()
@@ -4661,57 +4667,8 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
EXPORT_SYMBOL(rps_may_expire_flow);
#endif /* CONFIG_RFS_ACCEL */
-
-/* Called from hardirq (IPI) context */
-static void rps_trigger_softirq(void *data)
-{
- struct softnet_data *sd = data;
-
- ____napi_schedule(sd, &sd->backlog);
- sd->received_rps++;
-}
-
#endif /* CONFIG_RPS */
-/* Called from hardirq (IPI) context */
-static void trigger_rx_softirq(void *data)
-{
- struct softnet_data *sd = data;
-
- __raise_softirq_irqoff(NET_RX_SOFTIRQ);
- smp_store_release(&sd->defer_ipi_scheduled, 0);
-}
-
-/*
- * After we queued a packet into sd->input_pkt_queue,
- * we need to make sure this queue is serviced soon.
- *
- * - If this is another cpu queue, link it to our rps_ipi_list,
- * and make sure we will process rps_ipi_list from net_rx_action().
- *
- * - If this is our own queue, NAPI schedule our backlog.
- * Note that this also raises NET_RX_SOFTIRQ.
- */
-static void napi_schedule_rps(struct softnet_data *sd)
-{
- struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
-
-#ifdef CONFIG_RPS
- if (sd != mysd) {
- sd->rps_ipi_next = mysd->rps_ipi_list;
- mysd->rps_ipi_list = sd;
-
- /* If not called from net_rx_action() or napi_threaded_poll()
- * we have to raise NET_RX_SOFTIRQ.
- */
- if (!mysd->in_net_rx_action && !mysd->in_napi_threaded_poll)
- __raise_softirq_irqoff(NET_RX_SOFTIRQ);
- return;
- }
-#endif /* CONFIG_RPS */
- __napi_schedule_irqoff(&mysd->backlog);
-}
-
#ifdef CONFIG_NET_FLOW_LIMIT
int netdev_flow_limit_table_len __read_mostly = (1 << 12);
#endif
@@ -4784,7 +4741,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
* We can use non atomic operation since we own the queue lock
*/
if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state))
- napi_schedule_rps(sd);
+ __napi_schedule_irqoff(&sd->backlog);
goto enqueue;
}
reason = SKB_DROP_REASON_CPU_BACKLOG;
@@ -5899,63 +5856,12 @@ static void flush_all_backlogs(void)
cpus_read_unlock();
}
-static void net_rps_send_ipi(struct softnet_data *remsd)
-{
-#ifdef CONFIG_RPS
- while (remsd) {
- struct softnet_data *next = remsd->rps_ipi_next;
-
- if (cpu_online(remsd->cpu))
- smp_call_function_single_async(remsd->cpu, &remsd->csd);
- remsd = next;
- }
-#endif
-}
-
-/*
- * net_rps_action_and_irq_enable sends any pending IPI's for rps.
- * Note: called with local irq disabled, but exits with local irq enabled.
- */
-static void net_rps_action_and_irq_enable(struct softnet_data *sd)
-{
-#ifdef CONFIG_RPS
- struct softnet_data *remsd = sd->rps_ipi_list;
-
- if (remsd) {
- sd->rps_ipi_list = NULL;
-
- local_irq_enable();
-
- /* Send pending IPI's to kick RPS processing on remote cpus. */
- net_rps_send_ipi(remsd);
- } else
-#endif
- local_irq_enable();
-}
-
-static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
-{
-#ifdef CONFIG_RPS
- return sd->rps_ipi_list != NULL;
-#else
- return false;
-#endif
-}
-
static int process_backlog(struct napi_struct *napi, int quota)
{
struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
bool again = true;
int work = 0;
- /* Check if we have pending ipi, its better to send them now,
- * not waiting net_rx_action() end.
- */
- if (sd_has_rps_ipi_waiting(sd)) {
- local_irq_disable();
- net_rps_action_and_irq_enable(sd);
- }
-
napi->weight = READ_ONCE(dev_rx_weight);
while (again) {
struct sk_buff *skb;
@@ -5980,7 +5886,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
* We can use a plain write instead of clear_bit(),
* and we dont need an smp_mb() memory barrier.
*/
- napi->state = 0;
+ napi->state = NAPIF_STATE_THREADED;
again = false;
} else {
skb_queue_splice_tail_init(&sd->input_pkt_queue,
@@ -6650,40 +6556,42 @@ static void skb_defer_free_flush(struct softnet_data *sd)
}
}
+static void napi_threaded_poll_loop(struct napi_struct *napi)
+{
+ struct softnet_data *sd;
+
+ for (;;) {
+ bool repoll = false;
+ void *have;
+
+ local_bh_disable();
+ sd = this_cpu_ptr(&softnet_data);
+ sd->in_napi_threaded_poll = true;
+
+ have = netpoll_poll_lock(napi);
+ __napi_poll(napi, &repoll);
+ netpoll_poll_unlock(have);
+
+ sd->in_napi_threaded_poll = false;
+ barrier();
+
+ skb_defer_free_flush(sd);
+ local_bh_enable();
+
+ if (!repoll)
+ break;
+
+ cond_resched();
+ }
+}
+
static int napi_threaded_poll(void *data)
{
struct napi_struct *napi = data;
- struct softnet_data *sd;
- void *have;
- while (!napi_thread_wait(napi)) {
- for (;;) {
- bool repoll = false;
+ while (!napi_thread_wait(napi))
+ napi_threaded_poll_loop(napi);
- local_bh_disable();
- sd = this_cpu_ptr(&softnet_data);
- sd->in_napi_threaded_poll = true;
-
- have = netpoll_poll_lock(napi);
- __napi_poll(napi, &repoll);
- netpoll_poll_unlock(have);
-
- sd->in_napi_threaded_poll = false;
- barrier();
-
- if (sd_has_rps_ipi_waiting(sd)) {
- local_irq_disable();
- net_rps_action_and_irq_enable(sd);
- }
- skb_defer_free_flush(sd);
- local_bh_enable();
-
- if (!repoll)
- break;
-
- cond_resched();
- }
- }
return 0;
}
@@ -6717,8 +6625,6 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
*/
if (!list_empty(&sd->poll_list))
goto start;
- if (!sd_has_rps_ipi_waiting(sd))
- goto end;
}
break;
}
@@ -6747,8 +6653,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
else
sd->in_net_rx_action = false;
- net_rps_action_and_irq_enable(sd);
-end:;
+ local_irq_enable();
}
struct netdev_adjacent {
@@ -11187,7 +11092,7 @@ static int dev_cpu_dead(unsigned int oldcpu)
struct sk_buff **list_skb;
struct sk_buff *skb;
unsigned int cpu;
- struct softnet_data *sd, *oldsd, *remsd = NULL;
+ struct softnet_data *sd, *oldsd;
local_irq_disable();
cpu = smp_processor_id();
@@ -11220,7 +11125,7 @@ static int dev_cpu_dead(unsigned int oldcpu)
list_del_init(&napi->poll_list);
if (napi->poll == process_backlog)
- napi->state = 0;
+ napi->state = NAPIF_STATE_THREADED;
else
____napi_schedule(sd, napi);
}
@@ -11228,13 +11133,6 @@ static int dev_cpu_dead(unsigned int oldcpu)
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_enable();
-#ifdef CONFIG_RPS
- remsd = oldsd->rps_ipi_list;
- oldsd->rps_ipi_list = NULL;
-#endif
- /* send out pending IPI's on offline CPU */
- net_rps_send_ipi(remsd);
-
/* Process offline CPU's input_pkt_queue */
while ((skb = __skb_dequeue(&oldsd->process_queue))) {
netif_rx(skb);
@@ -11487,6 +11385,49 @@ static struct pernet_operations __net_initdata default_device_ops = {
*
*/
+static int backlog_napi_should_run(unsigned int cpu)
+{
+ struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu);
+ struct napi_struct *napi = &sd->backlog;
+
+ if (READ_ONCE(sd->defer_list))
+ return 1;
+
+ return test_bit(NAPI_STATE_SCHED, &napi->state);
+}
+
+static void run_backlog_napi(unsigned int cpu)
+{
+ struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu);
+
+ napi_threaded_poll_loop(&sd->backlog);
+}
+
+static void backlog_napi_setup(unsigned int cpu)
+{
+ struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu);
+ struct napi_struct *napi = &sd->backlog;
+
+ napi->thread = this_cpu_read(backlog_napi);
+ set_bit(NAPI_STATE_THREADED, &napi->state);
+}
+
+static void backlog_napi_park(unsigned int cpu)
+{
+ struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu);
+
+ skb_defer_free_flush(sd);
+}
+
+static struct smp_hotplug_thread backlog_threads = {
+ .store = &backlog_napi,
+ .thread_should_run = backlog_napi_should_run,
+ .thread_fn = run_backlog_napi,
+ .thread_comm = "backlog_napi/%u",
+ .setup = backlog_napi_setup,
+ .park = backlog_napi_park,
+};
+
/*
* This is called single threaded during boot, so no need
* to take the rtnl semaphore.
@@ -11527,17 +11468,14 @@ static int __init net_dev_init(void)
#endif
INIT_LIST_HEAD(&sd->poll_list);
sd->output_queue_tailp = &sd->output_queue;
-#ifdef CONFIG_RPS
- INIT_CSD(&sd->csd, rps_trigger_softirq, sd);
- sd->cpu = i;
-#endif
- INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
spin_lock_init(&sd->defer_lock);
init_gro_hash(&sd->backlog);
sd->backlog.poll = process_backlog;
sd->backlog.weight = weight_p;
+ INIT_LIST_HEAD(&sd->backlog.poll_list);
}
+ smpboot_register_percpu_thread(&backlog_threads);
dev_boot_phase = 0;
@@ -180,7 +180,7 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
sd->processed, sd->dropped, sd->time_squeeze, 0,
0, 0, 0, 0, /* was fastroute */
0, /* was cpu_collision */
- sd->received_rps, flow_limit_count,
+ 0 /* was received_rps */, flow_limit_count,
input_qlen + process_qlen, (int)seq->index,
input_qlen, process_qlen);
return 0;
@@ -6844,8 +6844,8 @@ nodefer: __kfree_skb(skb);
/* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU
* if we are unlucky enough (this seems very unlikely).
*/
- if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1))
- smp_call_function_single_async(cpu, &sd->defer_csd);
+ if (unlikely(kick))
+ __napi_schedule(&sd->backlog);
}
static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,