mac80211: Move reorder-sensitive TX handlers to after TXQ dequeue
The TXQ intermediate queues can cause packet reordering when more than
one flow is active to a single station. Since some of the wifi-specific
packet handling (notably sequence number and encryption handling) is
sensitive to re-ordering, things break if they are applied before the
TXQ.
This splits up the TX handlers and fast_xmit logic into two parts: An
early part and a late part. The former is applied before TXQ enqueue,
and the latter after dequeue. The non-TXQ path just applies both parts
at once.
Because fragments shouldn't be split up or reordered, the fragmentation
handler is run after dequeue. Any fragments are then kept in the TXQ and
on subsequent dequeues they take precedence over dequeueing from the FQ
structure.
This approach avoids having to scatter special cases all over the place
for when TXQ is enabled, at the cost of making the fast_xmit and TX
handler code slightly more complex.
Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
[fix a few code-style nits, make ieee80211_xmit_fast_finish void,
remove a useless txq->sta check]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 378a7a6..0ea1b0d 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -853,8 +853,7 @@
tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
tx->sta->tx_stats.msdu[tid]++;
- if (!tx->sta->sta.txq[0])
- hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
+ hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
return TX_CONTINUE;
}
@@ -1404,6 +1403,7 @@
fq_flow_init(&txqi->def_flow);
codel_vars_init(&txqi->def_cvars);
codel_stats_init(&txqi->cstats);
+ __skb_queue_head_init(&txqi->frags);
txqi->txq.vif = &sdata->vif;
@@ -1426,6 +1426,7 @@
struct fq_tin *tin = &txqi->tin;
fq_tin_reset(fq, tin, fq_skb_free_func);
+ ieee80211_purge_tx_queue(&local->hw, &txqi->frags);
}
int ieee80211_txq_setup_flows(struct ieee80211_local *local)
@@ -1495,6 +1496,47 @@
spin_unlock_bh(&fq->lock);
}
+static bool ieee80211_queue_skb(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta,
+ struct sk_buff *skb)
+{
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ struct fq *fq = &local->fq;
+ struct ieee80211_vif *vif;
+ struct txq_info *txqi;
+ struct ieee80211_sta *pubsta;
+
+ if (!local->ops->wake_tx_queue ||
+ sdata->vif.type == NL80211_IFTYPE_MONITOR)
+ return false;
+
+ if (sta && sta->uploaded)
+ pubsta = &sta->sta;
+ else
+ pubsta = NULL;
+
+ if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ sdata = container_of(sdata->bss,
+ struct ieee80211_sub_if_data, u.ap);
+
+ vif = &sdata->vif;
+ txqi = ieee80211_get_txq(local, vif, pubsta, skb);
+
+ if (!txqi)
+ return false;
+
+ info->control.vif = vif;
+
+ spin_lock_bh(&fq->lock);
+ ieee80211_txq_enqueue(local, txqi, skb);
+ spin_unlock_bh(&fq->lock);
+
+ drv_wake_tx_queue(local, txqi);
+
+ return true;
+}
+
static bool ieee80211_tx_frags(struct ieee80211_local *local,
struct ieee80211_vif *vif,
struct ieee80211_sta *sta,
@@ -1502,9 +1544,7 @@
bool txpending)
{
struct ieee80211_tx_control control = {};
- struct fq *fq = &local->fq;
struct sk_buff *skb, *tmp;
- struct txq_info *txqi;
unsigned long flags;
skb_queue_walk_safe(skbs, skb, tmp) {
@@ -1519,21 +1559,6 @@
}
#endif
- txqi = ieee80211_get_txq(local, vif, sta, skb);
- if (txqi) {
- info->control.vif = vif;
-
- __skb_unlink(skb, skbs);
-
- spin_lock_bh(&fq->lock);
- ieee80211_txq_enqueue(local, txqi, skb);
- spin_unlock_bh(&fq->lock);
-
- drv_wake_tx_queue(local, txqi);
-
- continue;
- }
-
spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
if (local->queue_stop_reasons[q] ||
(!txpending && !skb_queue_empty(&local->pending[q]))) {
@@ -1654,10 +1679,13 @@
/*
* Invoke TX handlers, return 0 on success and non-zero if the
* frame was dropped or queued.
+ *
+ * The handlers are split into an early and late part. The latter is everything
+ * that can be sensitive to reordering, and will be deferred to after packets
+ * are dequeued from the intermediate queues (when they are enabled).
*/
-static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
+static int invoke_tx_handlers_early(struct ieee80211_tx_data *tx)
{
- struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
ieee80211_tx_result res = TX_DROP;
#define CALL_TXH(txh) \
@@ -1675,6 +1703,31 @@
if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL))
CALL_TXH(ieee80211_tx_h_rate_ctrl);
+ txh_done:
+ if (unlikely(res == TX_DROP)) {
+ I802_DEBUG_INC(tx->local->tx_handlers_drop);
+ if (tx->skb)
+ ieee80211_free_txskb(&tx->local->hw, tx->skb);
+ else
+ ieee80211_purge_tx_queue(&tx->local->hw, &tx->skbs);
+ return -1;
+ } else if (unlikely(res == TX_QUEUED)) {
+ I802_DEBUG_INC(tx->local->tx_handlers_queued);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Late handlers can be called while the sta lock is held. Handlers that can
+ * cause packets to be generated will cause deadlock!
+ */
+static int invoke_tx_handlers_late(struct ieee80211_tx_data *tx)
+{
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
+ ieee80211_tx_result res = TX_CONTINUE;
+
if (unlikely(info->flags & IEEE80211_TX_INTFL_RETRANSMISSION)) {
__skb_queue_tail(&tx->skbs, tx->skb);
tx->skb = NULL;
@@ -1707,6 +1760,15 @@
return 0;
}
+static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
+{
+ int r = invoke_tx_handlers_early(tx);
+
+ if (r)
+ return r;
+ return invoke_tx_handlers_late(tx);
+}
+
bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw,
struct ieee80211_vif *vif, struct sk_buff *skb,
int band, struct ieee80211_sta **sta)
@@ -1781,7 +1843,13 @@
info->hw_queue =
sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
- if (!invoke_tx_handlers(&tx))
+ if (invoke_tx_handlers_early(&tx))
+ return false;
+
+ if (ieee80211_queue_skb(local, sdata, tx.sta, tx.skb))
+ return true;
+
+ if (!invoke_tx_handlers_late(&tx))
result = __ieee80211_tx(local, &tx.skbs, led_len,
tx.sta, txpending);
@@ -3125,8 +3193,71 @@
return ret;
}
+/*
+ * Can be called while the sta lock is held. Anything that can cause packets to
+ * be generated will cause deadlock!
+ */
+static void ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta, u8 pn_offs,
+ struct ieee80211_key *key,
+ struct sk_buff *skb)
+{
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ struct ieee80211_hdr *hdr = (void *)skb->data;
+ u8 tid = IEEE80211_NUM_TIDS;
+
+ if (key)
+ info->control.hw_key = &key->conf;
+
+ ieee80211_tx_stats(skb->dev, skb->len);
+
+ if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
+ tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
+ *ieee80211_get_qos_ctl(hdr) = tid;
+ hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid);
+ } else {
+ info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
+ hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number);
+ sdata->sequence_number += 0x10;
+ }
+
+ if (skb_shinfo(skb)->gso_size)
+ sta->tx_stats.msdu[tid] +=
+ DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size);
+ else
+ sta->tx_stats.msdu[tid]++;
+
+ info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
+
+ /* statistics normally done by ieee80211_tx_h_stats (but that
+ * has to consider fragmentation, so is more complex)
+ */
+ sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len;
+ sta->tx_stats.packets[skb_get_queue_mapping(skb)]++;
+
+ if (pn_offs) {
+ u64 pn;
+ u8 *crypto_hdr = skb->data + pn_offs;
+
+ switch (key->conf.cipher) {
+ case WLAN_CIPHER_SUITE_CCMP:
+ case WLAN_CIPHER_SUITE_CCMP_256:
+ case WLAN_CIPHER_SUITE_GCMP:
+ case WLAN_CIPHER_SUITE_GCMP_256:
+ pn = atomic64_inc_return(&key->conf.tx_pn);
+ crypto_hdr[0] = pn;
+ crypto_hdr[1] = pn >> 8;
+ crypto_hdr[4] = pn >> 16;
+ crypto_hdr[5] = pn >> 24;
+ crypto_hdr[6] = pn >> 32;
+ crypto_hdr[7] = pn >> 40;
+ break;
+ }
+ }
+}
+
static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
- struct net_device *dev, struct sta_info *sta,
+ struct sta_info *sta,
struct ieee80211_fast_tx *fast_tx,
struct sk_buff *skb)
{
@@ -3177,8 +3308,6 @@
return true;
}
- ieee80211_tx_stats(dev, skb->len + extra_head);
-
if ((hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) &&
ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb))
return true;
@@ -3207,24 +3336,7 @@
info->flags = IEEE80211_TX_CTL_FIRST_FRAGMENT |
IEEE80211_TX_CTL_DONTFRAG |
(tid_tx ? IEEE80211_TX_CTL_AMPDU : 0);
-
- if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
- *ieee80211_get_qos_ctl(hdr) = tid;
- if (!sta->sta.txq[0])
- hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid);
- } else {
- info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
- hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number);
- sdata->sequence_number += 0x10;
- }
-
- if (skb_shinfo(skb)->gso_size)
- sta->tx_stats.msdu[tid] +=
- DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size);
- else
- sta->tx_stats.msdu[tid]++;
-
- info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
+ info->control.flags = IEEE80211_TX_CTRL_FAST_XMIT;
__skb_queue_head_init(&tx.skbs);
@@ -3234,9 +3346,6 @@
tx.sta = sta;
tx.key = fast_tx->key;
- if (fast_tx->key)
- info->control.hw_key = &fast_tx->key->conf;
-
if (!ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
tx.skb = skb;
r = ieee80211_tx_h_rate_ctrl(&tx);
@@ -3250,31 +3359,11 @@
}
}
- /* statistics normally done by ieee80211_tx_h_stats (but that
- * has to consider fragmentation, so is more complex)
- */
- sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len;
- sta->tx_stats.packets[skb_get_queue_mapping(skb)]++;
+ if (ieee80211_queue_skb(local, sdata, sta, skb))
+ return true;
- if (fast_tx->pn_offs) {
- u64 pn;
- u8 *crypto_hdr = skb->data + fast_tx->pn_offs;
-
- switch (fast_tx->key->conf.cipher) {
- case WLAN_CIPHER_SUITE_CCMP:
- case WLAN_CIPHER_SUITE_CCMP_256:
- case WLAN_CIPHER_SUITE_GCMP:
- case WLAN_CIPHER_SUITE_GCMP_256:
- pn = atomic64_inc_return(&fast_tx->key->conf.tx_pn);
- crypto_hdr[0] = pn;
- crypto_hdr[1] = pn >> 8;
- crypto_hdr[4] = pn >> 16;
- crypto_hdr[5] = pn >> 24;
- crypto_hdr[6] = pn >> 32;
- crypto_hdr[7] = pn >> 40;
- break;
- }
- }
+ ieee80211_xmit_fast_finish(sdata, sta, fast_tx->pn_offs,
+ fast_tx->key, skb);
if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
sdata = container_of(sdata->bss,
@@ -3294,12 +3383,21 @@
struct sk_buff *skb = NULL;
struct fq *fq = &local->fq;
struct fq_tin *tin = &txqi->tin;
+ struct ieee80211_tx_info *info;
+ struct ieee80211_tx_data tx;
+ ieee80211_tx_result r;
spin_lock_bh(&fq->lock);
if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags))
goto out;
+ /* Make sure fragments stay together. */
+ skb = __skb_dequeue(&txqi->frags);
+ if (skb)
+ goto out;
+
+begin:
skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func);
if (!skb)
goto out;
@@ -3307,16 +3405,46 @@
ieee80211_set_skb_vif(skb, txqi);
hdr = (struct ieee80211_hdr *)skb->data;
- if (txq->sta && ieee80211_is_data_qos(hdr->frame_control)) {
+ info = IEEE80211_SKB_CB(skb);
+
+ memset(&tx, 0, sizeof(tx));
+ __skb_queue_head_init(&tx.skbs);
+ tx.local = local;
+ tx.skb = skb;
+ tx.sdata = vif_to_sdata(info->control.vif);
+
+ if (txq->sta)
+ tx.sta = container_of(txq->sta, struct sta_info, sta);
+
+ /*
+ * The key can be removed while the packet was queued, so need to call
+ * this here to get the current key.
+ */
+ r = ieee80211_tx_h_select_key(&tx);
+ if (r != TX_CONTINUE) {
+ ieee80211_free_txskb(&local->hw, skb);
+ goto begin;
+ }
+
+ if (info->control.flags & IEEE80211_TX_CTRL_FAST_XMIT) {
struct sta_info *sta = container_of(txq->sta, struct sta_info,
sta);
- struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ u8 pn_offs = 0;
- hdr->seq_ctrl = ieee80211_tx_next_seq(sta, txq->tid);
- if (test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags))
- info->flags |= IEEE80211_TX_CTL_AMPDU;
- else
- info->flags &= ~IEEE80211_TX_CTL_AMPDU;
+ if (tx.key &&
+ (tx.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV))
+ pn_offs = ieee80211_hdrlen(hdr->frame_control);
+
+ ieee80211_xmit_fast_finish(sta->sdata, sta, pn_offs,
+ tx.key, skb);
+ } else {
+ if (invoke_tx_handlers_late(&tx))
+ goto begin;
+
+ skb = __skb_dequeue(&tx.skbs);
+
+ if (!skb_queue_empty(&tx.skbs))
+ skb_queue_splice_tail(&tx.skbs, &txqi->frags);
}
out:
@@ -3354,7 +3482,7 @@
fast_tx = rcu_dereference(sta->fast_tx);
if (fast_tx &&
- ieee80211_xmit_fast(sdata, dev, sta, fast_tx, skb))
+ ieee80211_xmit_fast(sdata, sta, fast_tx, skb))
goto out;
}