mac80211: add improved HW queue control

mac80211 currently only supports one hardware queue
per AC. This is already problematic for off-channel
uses since if we go off channel while the BE queue
is full and then try to send an off-channel frame
the frame will never go out. This will become worse
when we support multi-channel since then a queue on
one channel might be full, but we have to stop the
software queue for all channels. That is obviously
not desirable.

To address this problem allow drivers to register
more hardware queues, and allow them to map them to
virtual interfaces. When they stop a hardware queue
the corresponding AC software queues on the correct
interfaces will be stopped as well. Additionally,
there's an off-channel queue to solve that problem
and a per-interface after-DTIM beacon queue. This
allows drivers to manage software queues closer to
how the hardware works.

Currently, there's a limit of 16 hardware queues.
This may or may not be sufficient, we can adjust it
as needed.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 2956a20..ec4995d 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -95,9 +95,11 @@
  * @IEEE80211_MAX_QUEUES: Maximum number of regular device queues.
  */
 enum ieee80211_max_queues {
-	IEEE80211_MAX_QUEUES =		4,
+	IEEE80211_MAX_QUEUES =		16,
 };
 
+#define IEEE80211_INVAL_HW_QUEUE	0xff
+
 /**
  * enum ieee80211_ac_numbers - AC numbers as used in mac80211
  * @IEEE80211_AC_VO: voice
@@ -522,7 +524,7 @@
  *
  * @flags: transmit info flags, defined above
  * @band: the band to transmit on (use for checking for races)
- * @reserved: reserved for future use
+ * @hw_queue: HW queue to put the frame on, skb_get_queue_mapping() gives the AC
  * @ack_frame_id: internal frame ID for TX status, used internally
  * @control: union for control data
  * @status: union for status data
@@ -538,7 +540,7 @@
 	u32 flags;
 	u8 band;
 
-	u8 reserved;
+	u8 hw_queue;
 
 	u16 ack_frame_id;
 
@@ -889,6 +891,8 @@
  *	these need to be set (or cleared) when the interface is added
  *	or, if supported by the driver, the interface type is changed
  *	at runtime, mac80211 will never touch this field
+ * @hw_queue: hardware queue for each AC
+ * @cab_queue: content-after-beacon (DTIM beacon really) queue, AP mode only
  * @drv_priv: data area for driver use, will always be aligned to
  *	sizeof(void *).
  */
@@ -897,7 +901,12 @@
 	struct ieee80211_bss_conf bss_conf;
 	u8 addr[ETH_ALEN];
 	bool p2p;
+
+	u8 cab_queue;
+	u8 hw_queue[IEEE80211_NUM_ACS];
+
 	u32 driver_flags;
+
 	/* must be last */
 	u8 drv_priv[0] __attribute__((__aligned__(sizeof(void *))));
 };
@@ -1179,6 +1188,11 @@
  * @IEEE80211_HW_WANT_MONITOR_VIF: The driver would like to be informed of
  *	a virtual monitor interface when monitor interfaces are the only
  *	active interfaces.
+ *
+ * @IEEE80211_HW_QUEUE_CONTROL: The driver wants to control per-interface
+ *	queue mapping in order to use different queues (not just one per AC)
+ *	for different virtual interfaces. See the doc section on HW queue
+ *	control for more details.
  */
 enum ieee80211_hw_flags {
 	IEEE80211_HW_HAS_RATE_CONTROL			= 1<<0,
@@ -1201,7 +1215,7 @@
 	IEEE80211_HW_SUPPORTS_UAPSD			= 1<<17,
 	IEEE80211_HW_REPORTS_TX_ACK_STATUS		= 1<<18,
 	IEEE80211_HW_CONNECTION_MONITOR			= 1<<19,
-	/* reuse bit 20 */
+	IEEE80211_HW_QUEUE_CONTROL			= 1<<20,
 	IEEE80211_HW_SUPPORTS_PER_STA_GTK		= 1<<21,
 	IEEE80211_HW_AP_LINK_PS				= 1<<22,
 	IEEE80211_HW_TX_AMPDU_SETUP_IN_HW		= 1<<23,
@@ -1271,6 +1285,9 @@
  * @max_tx_aggregation_subframes: maximum number of subframes in an
  *	aggregate an HT driver will transmit, used by the peer as a
  *	hint to size its reorder buffer.
+ *
+ * @offchannel_tx_hw_queue: HW queue ID to use for offchannel TX
+ *	(if %IEEE80211_HW_QUEUE_CONTROL is set)
  */
 struct ieee80211_hw {
 	struct ieee80211_conf conf;
@@ -1291,6 +1308,7 @@
 	u8 max_rate_tries;
 	u8 max_rx_aggregation_subframes;
 	u8 max_tx_aggregation_subframes;
+	u8 offchannel_tx_hw_queue;
 };
 
 /**
@@ -1699,6 +1717,61 @@
  */
 
 /**
+ * DOC: HW queue control
+ *
+ * Before HW queue control was introduced, mac80211 only had a single static
+ * assignment of per-interface AC software queues to hardware queues. This
+ * was problematic for a few reasons:
+ * 1) off-channel transmissions might get stuck behind other frames
+ * 2) multiple virtual interfaces couldn't be handled correctly
+ * 3) after-DTIM frames could get stuck behind other frames
+ *
+ * To solve this, hardware typically uses multiple different queues for all
+ * the different usages, and this needs to be propagated into mac80211 so it
+ * won't have the same problem with the software queues.
+ *
+ * Therefore, mac80211 now offers the %IEEE80211_HW_QUEUE_CONTROL capability
+ * flag that tells it that the driver implements its own queue control. To do
+ * so, the driver will set up the various queues in each &struct ieee80211_vif
+ * and the offchannel queue in &struct ieee80211_hw. In response, mac80211 will
+ * use those queue IDs in the hw_queue field of &struct ieee80211_tx_info and
+ * if necessary will queue the frame on the right software queue that mirrors
+ * the hardware queue.
+ * Additionally, the driver has to then use these HW queue IDs for the queue
+ * management functions (ieee80211_stop_queue() et al.)
+ *
+ * The driver is free to set up the queue mappings as needed, multiple virtual
+ * interfaces may map to the same hardware queues if needed. The setup has to
+ * happen during add_interface or change_interface callbacks. For example, a
+ * driver supporting station+station and station+AP modes might decide to have
+ * 10 hardware queues to handle different scenarios:
+ *
+ * 4 AC HW queues for 1st vif: 0, 1, 2, 3
+ * 4 AC HW queues for 2nd vif: 4, 5, 6, 7
+ * after-DTIM queue for AP:   8
+ * off-channel queue:         9
+ *
+ * It would then set up the hardware like this:
+ *   hw.offchannel_tx_hw_queue = 9
+ *
+ * and the first virtual interface that is added as follows:
+ *   vif.hw_queue[IEEE80211_AC_VO] = 0
+ *   vif.hw_queue[IEEE80211_AC_VI] = 1
+ *   vif.hw_queue[IEEE80211_AC_BE] = 2
+ *   vif.hw_queue[IEEE80211_AC_BK] = 3
+ *   vif.cab_queue = 8 // if AP mode, otherwise %IEEE80211_INVAL_HW_QUEUE
+ * and the second virtual interface with 4-7.
+ *
+ * If queue 6 gets full, for example, mac80211 would only stop the second
+ * virtual interface's BE queue since virtual interface queues are per AC.
+ *
+ * Note that the vif.cab_queue value should be set to %IEEE80211_INVAL_HW_QUEUE
+ * whenever the queue is not used (i.e. the interface is not in AP mode) if the
+ * queue could potentially be shared since mac80211 will look at cab_queue when
+ * a queue is stopped/woken even if the interface is not in AP mode.
+ */
+
+/**
  * enum ieee80211_filter_flags - hardware filter flags
  *
  * These flags determine what the filter in hardware should be
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 9628a18..5b7053c 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -286,25 +286,25 @@
  * a global "agg_queue_stop" refcount.
  */
 static void __acquires(agg_queue)
-ieee80211_stop_queue_agg(struct ieee80211_local *local, int tid)
+ieee80211_stop_queue_agg(struct ieee80211_sub_if_data *sdata, int tid)
 {
-	int queue = ieee80211_ac_from_tid(tid);
+	int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)];
 
-	if (atomic_inc_return(&local->agg_queue_stop[queue]) == 1)
+	if (atomic_inc_return(&sdata->local->agg_queue_stop[queue]) == 1)
 		ieee80211_stop_queue_by_reason(
-			&local->hw, queue,
+			&sdata->local->hw, queue,
 			IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
 	__acquire(agg_queue);
 }
 
 static void __releases(agg_queue)
-ieee80211_wake_queue_agg(struct ieee80211_local *local, int tid)
+ieee80211_wake_queue_agg(struct ieee80211_sub_if_data *sdata, int tid)
 {
-	int queue = ieee80211_ac_from_tid(tid);
+	int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)];
 
-	if (atomic_dec_return(&local->agg_queue_stop[queue]) == 0)
+	if (atomic_dec_return(&sdata->local->agg_queue_stop[queue]) == 0)
 		ieee80211_wake_queue_by_reason(
-			&local->hw, queue,
+			&sdata->local->hw, queue,
 			IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
 	__release(agg_queue);
 }
@@ -314,13 +314,14 @@
  * requires a call to ieee80211_agg_splice_finish later
  */
 static void __acquires(agg_queue)
-ieee80211_agg_splice_packets(struct ieee80211_local *local,
+ieee80211_agg_splice_packets(struct ieee80211_sub_if_data *sdata,
 			     struct tid_ampdu_tx *tid_tx, u16 tid)
 {
-	int queue = ieee80211_ac_from_tid(tid);
+	struct ieee80211_local *local = sdata->local;
+	int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)];
 	unsigned long flags;
 
-	ieee80211_stop_queue_agg(local, tid);
+	ieee80211_stop_queue_agg(sdata, tid);
 
 	if (WARN(!tid_tx, "TID %d gone but expected when splicing aggregates"
 			  " from the pending queue\n", tid))
@@ -336,9 +337,9 @@
 }
 
 static void __releases(agg_queue)
-ieee80211_agg_splice_finish(struct ieee80211_local *local, u16 tid)
+ieee80211_agg_splice_finish(struct ieee80211_sub_if_data *sdata, u16 tid)
 {
-	ieee80211_wake_queue_agg(local, tid);
+	ieee80211_wake_queue_agg(sdata, tid);
 }
 
 void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
@@ -376,9 +377,9 @@
 					" tid %d\n", tid);
 #endif
 		spin_lock_bh(&sta->lock);
-		ieee80211_agg_splice_packets(local, tid_tx, tid);
+		ieee80211_agg_splice_packets(sdata, tid_tx, tid);
 		ieee80211_assign_tid_tx(sta, tid, NULL);
-		ieee80211_agg_splice_finish(local, tid);
+		ieee80211_agg_splice_finish(sdata, tid);
 		spin_unlock_bh(&sta->lock);
 
 		kfree_rcu(tid_tx, rcu_head);
@@ -598,14 +599,14 @@
 	 */
 	spin_lock_bh(&sta->lock);
 
-	ieee80211_agg_splice_packets(local, tid_tx, tid);
+	ieee80211_agg_splice_packets(sta->sdata, tid_tx, tid);
 	/*
 	 * Now mark as operational. This will be visible
 	 * in the TX path, and lets it go lock-free in
 	 * the common case.
 	 */
 	set_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state);
-	ieee80211_agg_splice_finish(local, tid);
+	ieee80211_agg_splice_finish(sta->sdata, tid);
 
 	spin_unlock_bh(&sta->lock);
 }
@@ -790,12 +791,12 @@
 	 * more.
 	 */
 
-	ieee80211_agg_splice_packets(local, tid_tx, tid);
+	ieee80211_agg_splice_packets(sta->sdata, tid_tx, tid);
 
 	/* future packets must not find the tid_tx struct any more */
 	ieee80211_assign_tid_tx(sta, tid, NULL);
 
-	ieee80211_agg_splice_finish(local, tid);
+	ieee80211_agg_splice_finish(sta->sdata, tid);
 
 	kfree_rcu(tid_tx, rcu_head);
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 667d939..d6163b9 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2106,6 +2106,10 @@
 
 	IEEE80211_SKB_CB(skb)->flags = flags;
 
+	if (flags & IEEE80211_TX_CTL_TX_OFFCHAN)
+		IEEE80211_SKB_CB(skb)->hw_queue =
+			local->hw.offchannel_tx_hw_queue;
+
 	skb->dev = sdata->dev;
 
 	*cookie = (unsigned long) skb;
@@ -2147,6 +2151,8 @@
 		/* modify cookie to prevent API mismatches */
 		*cookie ^= 2;
 		IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN;
+		IEEE80211_SKB_CB(skb)->hw_queue =
+			local->hw.offchannel_tx_hw_queue;
 		local->hw_roc_skb = skb;
 		local->hw_roc_skb_for_status = skb;
 		mutex_unlock(&local->mtx);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 8ed074f..4be11ea 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1449,6 +1449,7 @@
 				    enum queue_stop_reason reason);
 void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue,
 				    enum queue_stop_reason reason);
+void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue);
 void ieee80211_add_pending_skb(struct ieee80211_local *local,
 			       struct sk_buff *skb);
 void ieee80211_add_pending_skbs_fn(struct ieee80211_local *local,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 2b88cb2..ed29764 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -149,6 +149,34 @@
 	return 0;
 }
 
+static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata)
+{
+	int n_queues = sdata->local->hw.queues;
+	int i;
+
+	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
+		if (WARN_ON_ONCE(sdata->vif.hw_queue[i] ==
+				 IEEE80211_INVAL_HW_QUEUE))
+			return -EINVAL;
+		if (WARN_ON_ONCE(sdata->vif.hw_queue[i] >=
+				 n_queues))
+			return -EINVAL;
+	}
+
+	if (sdata->vif.type != NL80211_IFTYPE_AP) {
+		sdata->vif.cab_queue = IEEE80211_INVAL_HW_QUEUE;
+		return 0;
+	}
+
+	if (WARN_ON_ONCE(sdata->vif.cab_queue == IEEE80211_INVAL_HW_QUEUE))
+		return -EINVAL;
+
+	if (WARN_ON_ONCE(sdata->vif.cab_queue >= n_queues))
+		return -EINVAL;
+
+	return 0;
+}
+
 void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata,
 				    const int offset)
 {
@@ -169,6 +197,20 @@
 #undef ADJUST
 }
 
+static void ieee80211_set_default_queues(struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_local *local = sdata->local;
+	int i;
+
+	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
+		if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)
+			sdata->vif.hw_queue[i] = IEEE80211_INVAL_HW_QUEUE;
+		else
+			sdata->vif.hw_queue[i] = i;
+	}
+	sdata->vif.cab_queue = IEEE80211_INVAL_HW_QUEUE;
+}
+
 static int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
 {
 	struct ieee80211_sub_if_data *sdata;
@@ -190,6 +232,8 @@
 	snprintf(sdata->name, IFNAMSIZ, "%s-monitor",
 		 wiphy_name(local->hw.wiphy));
 
+	ieee80211_set_default_queues(sdata);
+
 	ret = drv_add_interface(local, sdata);
 	if (WARN_ON(ret)) {
 		/* ok .. stupid driver, it asked for this! */
@@ -197,6 +241,12 @@
 		return ret;
 	}
 
+	ret = ieee80211_check_queues(sdata);
+	if (ret) {
+		kfree(sdata);
+		return ret;
+	}
+
 	rcu_assign_pointer(local->monitor_sdata, sdata);
 
 	return 0;
@@ -344,6 +394,9 @@
 			res = drv_add_interface(local, sdata);
 			if (res)
 				goto err_stop;
+			res = ieee80211_check_queues(sdata);
+			if (res)
+				goto err_del_interface;
 		}
 
 		if (sdata->vif.type == NL80211_IFTYPE_AP) {
@@ -1040,6 +1093,13 @@
 	if (ret)
 		type = sdata->vif.type;
 
+	/*
+	 * Ignore return value here, there's not much we can do since
+	 * the driver changed the interface type internally already.
+	 * The warnings will hopefully make driver authors fix it :-)
+	 */
+	ieee80211_check_queues(sdata);
+
 	ieee80211_setup_sdata(sdata, type);
 
 	err = ieee80211_do_open(sdata->dev, false);
@@ -1266,6 +1326,8 @@
 			       sizeof(sdata->rc_rateidx_mcs_mask[i]));
 	}
 
+	ieee80211_set_default_queues(sdata);
+
 	/* setup type-dependent data */
 	ieee80211_setup_sdata(sdata, type);
 
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index d019f0d..ac79d5e 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -591,6 +591,7 @@
 	local->hw.max_report_rates = 0;
 	local->hw.max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
 	local->hw.max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
+	local->hw.offchannel_tx_hw_queue = IEEE80211_INVAL_HW_QUEUE;
 	local->hw.conf.long_frame_max_tx_count = wiphy->retry_long;
 	local->hw.conf.short_frame_max_tx_count = wiphy->retry_short;
 	local->user_power_level = -1;
@@ -687,6 +688,11 @@
 		WLAN_CIPHER_SUITE_AES_CMAC
 	};
 
+	if (hw->flags & IEEE80211_HW_QUEUE_CONTROL &&
+	    (local->hw.offchannel_tx_hw_queue == IEEE80211_INVAL_HW_QUEUE ||
+	     local->hw.offchannel_tx_hw_queue >= local->hw.queues))
+		return -EINVAL;
+
 	if ((hw->wiphy->wowlan.flags || hw->wiphy->wowlan.n_patterns)
 #ifdef CONFIG_PM
 	    && (!local->ops->suspend || !local->ops->resume)
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index a8d0188..4f6aac1 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -400,6 +400,8 @@
 		return TX_CONTINUE;
 
 	info->flags |= IEEE80211_TX_CTL_SEND_AFTER_DTIM;
+	if (tx->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)
+		info->hw_queue = tx->sdata->vif.cab_queue;
 
 	/* device releases frame after DTIM beacon */
 	if (!(tx->local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING))
@@ -1214,11 +1216,19 @@
 			       bool txpending)
 {
 	struct sk_buff *skb, *tmp;
-	struct ieee80211_tx_info *info;
 	unsigned long flags;
 
 	skb_queue_walk_safe(skbs, skb, tmp) {
-		int q = skb_get_queue_mapping(skb);
+		struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+		int q = info->hw_queue;
+
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+		if (WARN_ON_ONCE(q >= local->hw.queues)) {
+			__skb_unlink(skb, skbs);
+			dev_kfree_skb(skb);
+			continue;
+		}
+#endif
 
 		spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
 		if (local->queue_stop_reasons[q] ||
@@ -1240,7 +1250,6 @@
 		}
 		spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
 
-		info = IEEE80211_SKB_CB(skb);
 		info->control.vif = vif;
 		info->control.sta = sta;
 
@@ -1284,9 +1293,14 @@
 	switch (sdata->vif.type) {
 	case NL80211_IFTYPE_MONITOR:
 		sdata = rcu_dereference(local->monitor_sdata);
-		if (sdata)
+		if (sdata) {
 			vif = &sdata->vif;
-		else
+			info->hw_queue =
+				vif->hw_queue[skb_get_queue_mapping(skb)];
+		} else if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) {
+			dev_kfree_skb(skb);
+			return true;
+		} else
 			vif = NULL;
 		break;
 	case NL80211_IFTYPE_AP_VLAN:
@@ -1402,6 +1416,12 @@
 	tx.channel = local->hw.conf.channel;
 	info->band = tx.channel->band;
 
+	/* set up hw_queue value early */
+	if (!(info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) ||
+	    !(local->hw.flags & IEEE80211_HW_QUEUE_CONTROL))
+		info->hw_queue =
+			sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
+
 	if (!invoke_tx_handlers(&tx))
 		result = __ieee80211_tx(local, &tx.skbs, led_len,
 					tx.sta, txpending);
@@ -2172,7 +2192,6 @@
 void ieee80211_tx_pending(unsigned long data)
 {
 	struct ieee80211_local *local = (struct ieee80211_local *)data;
-	struct ieee80211_sub_if_data *sdata;
 	unsigned long flags;
 	int i;
 	bool txok;
@@ -2209,8 +2228,7 @@
 		}
 
 		if (skb_queue_empty(&local->pending[i]))
-			list_for_each_entry_rcu(sdata, &local->interfaces, list)
-				netif_wake_subqueue(sdata->dev, i);
+			ieee80211_propagate_queue_wake(local, i);
 	}
 	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
 
@@ -2717,11 +2735,13 @@
 void ieee80211_tx_skb_tid(struct ieee80211_sub_if_data *sdata,
 			  struct sk_buff *skb, int tid)
 {
+	int ac = ieee802_1d_to_ac[tid];
+
 	skb_set_mac_header(skb, 0);
 	skb_set_network_header(skb, 0);
 	skb_set_transport_header(skb, 0);
 
-	skb_set_queue_mapping(skb, ieee802_1d_to_ac[tid]);
+	skb_set_queue_mapping(skb, ac);
 	skb->priority = tid;
 
 	/*
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 9e8f4b8..e67fe5c 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -265,11 +265,36 @@
 }
 EXPORT_SYMBOL(ieee80211_ctstoself_duration);
 
+void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue)
+{
+	struct ieee80211_sub_if_data *sdata;
+
+	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+		int ac;
+
+		if (test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state))
+			continue;
+
+		if (sdata->vif.cab_queue != IEEE80211_INVAL_HW_QUEUE &&
+		    local->queue_stop_reasons[sdata->vif.cab_queue] != 0)
+			continue;
+
+		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+			int ac_queue = sdata->vif.hw_queue[ac];
+
+			if (ac_queue == queue ||
+			    (sdata->vif.cab_queue == queue &&
+			     local->queue_stop_reasons[ac_queue] == 0 &&
+			     skb_queue_empty(&local->pending[ac_queue])))
+				netif_wake_subqueue(sdata->dev, ac);
+		}
+	}
+}
+
 static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue,
 				   enum queue_stop_reason reason)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
-	struct ieee80211_sub_if_data *sdata;
 
 	trace_wake_queue(local, queue, reason);
 
@@ -287,11 +312,7 @@
 
 	if (skb_queue_empty(&local->pending[queue])) {
 		rcu_read_lock();
-		list_for_each_entry_rcu(sdata, &local->interfaces, list) {
-			if (test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state))
-				continue;
-			netif_wake_subqueue(sdata->dev, queue);
-		}
+		ieee80211_propagate_queue_wake(local, queue);
 		rcu_read_unlock();
 	} else
 		tasklet_schedule(&local->tx_pending_tasklet);
@@ -332,8 +353,15 @@
 	__set_bit(reason, &local->queue_stop_reasons[queue]);
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(sdata, &local->interfaces, list)
-		netif_stop_subqueue(sdata->dev, queue);
+	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+		int ac;
+
+		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+			if (sdata->vif.hw_queue[ac] == queue ||
+			    sdata->vif.cab_queue == queue)
+				netif_stop_subqueue(sdata->dev, ac);
+		}
+	}
 	rcu_read_unlock();
 }
 
@@ -360,8 +388,8 @@
 {
 	struct ieee80211_hw *hw = &local->hw;
 	unsigned long flags;
-	int queue = skb_get_queue_mapping(skb);
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	int queue = info->hw_queue;
 
 	if (WARN_ON(!info->control.vif)) {
 		kfree_skb(skb);
@@ -393,7 +421,7 @@
 			continue;
 		}
 
-		queue = skb_get_queue_mapping(skb);
+		queue = info->hw_queue;
 
 		__ieee80211_stop_queue(hw, queue,
 				IEEE80211_QUEUE_STOP_REASON_SKB_ADD);