High CPU usage by [ksoftirqd/0] but very low network load

Hi all,

I am experiencing high CPU usage by [ksoftirqd/0] and STAs are being kicked off the network.

Only 3 to 4 STA connected and usage is just regular traffic, facebook, youtube, slack and email.

This AP is NOT doing NAT, it's being used a dumbAP which is connected to linux router.

As you can see: sirq is 100% and [ksoftirqd/0] is 77%.

Mem: 129344K used, 124612K free, 752K shrd, 3960K buff, 12696K cached
CPU:   0% usr   0% sys   0% nic   0% idle   0% io   0% irq 100% sirq
Load average: 1.26 1.00 0.73 2/40 12835
  PID  PPID USER     STAT   VSZ %VSZ %CPU COMMAND
    7     2 root     RW       0   0%  77% [ksoftirqd/0]
 1558     1 root     S     4408   2%   8% /usr/sbin/hostapd -s -P /var/run/wifi-phy0.pid -B /var/run/ho
12835 12811 root     R     1200   0%   8% top -d2
12786     2 root     IW       0   0%   8% [kworker/0:0]
 1543     1 root     S     4364   2%   0% /usr/sbin/hostapd -s -P /var/run/wifi-phy1.pid -B /var/run/ho
  932     1 root     S     3684   1%   0% /usr/sbin/sshd -D
12809   932 root     S     3676   1%   0% sshd: root@pts/0
  879     1 root     S     1760   1%   0% /sbin/netifd
    1     0 root     S     1556   1%   0% /sbin/procd
  822     1 root     S     1280   1%   0% /sbin/logd -S 64
  482     1 root     S     1220   0%   0% /sbin/ubusd
12811 12809 root     S     1212   0%   0% -ash
 1201     1 root     S<    1204   0%   0% /usr/sbin/ntpd -n -N -S /usr/sbin/ntpd-hotplug -p 0.openwrt.p
 1161   879 root     S     1200   0%   0% udhcpc -p /var/run/udhcpc-br-lan.pid -s /lib/netifd/dhcp.scri
  500     1 root     S     1016   0%   0% /sbin/urngd
  483     1 root     S      912   0%   0% /sbin/askfirst /usr/libexec/login.sh
11400     2 root     IW       0   0%   0% [kworker/u2:2]
12766     2 root     IW       0   0%   0% [kworker/u2:1]
  196     2 root     SW       0   0%   0% [spi0]
  432     2 root     SWN      0   0%   0% [jffs2_gcd_mtd5]
  581     2 root     IW<      0   0%   0% [ath10k_wq]
  568     2 root     IW<      0   0%   0% [cfg80211]
  582     2 root     IW<      0   0%   0% [ath10k_aux_wq]
  366     2 root     IW<      0   0%   0% [kworker/0:1H]
   91     2 root     IW<      0   0%   0% [kblockd]
   89     2 root     IW<      0   0%   0% [crypto]
  127     2 root     SW       0   0%   0% [kswapd0]
  340     2 root     IW<      0   0%   0% [ipv6_addrconf]
   42     2 root     SW       0   0%   0% [oom_reaper]
    8     2 root     IW<      0   0%   0% [netns]
   86     2 root     IW<      0   0%   0% [writeback]
    6     2 root     IW<      0   0%   0% [mm_percpu_wq]
    2     0 root     SW       0   0%   0% [kthreadd]
    4     2 root     IW<      0   0%   0% [kworker/0:0H]
   88     2 root     SW       0   0%   0% [kcompactd0]
  359     2 root     IW<      0   0%   0% [dsa_ordered]
447     2 root     IW       0   0%   0% [kworker/0:2]

cat /proc/interrupts

           CPU0
  4:         39      MIPS   4  eth0
  5:  213867404      MIPS   5  eth1
  7:  338230957      MIPS   7  timer
 11:         26      MISC   3  ttyS0
 40:  687755401  AR724X PCI       ath10k_pci
 47:  475677693     dummy      ath9k
 48:          0     dummy      ehci_hcd:usb1
 49:          0     dummy      ehci_hcd:usb2
ERR:     240281

OpenWRT info

version r10316-8bdf50e9af
compiled from snapshot
DISTRIB_REVISION='78e5018'
DISTRIB_TARGET='ar71xx/generic'
DISTRIB_ARCH='mips_24kc'
Compiled on = 2019-06-25

/etc/config/firewall


config defaults
    option syn_flood '1'
    option input 'ACCEPT'
    option output 'ACCEPT'
    option forward 'REJECT'

config zone
    option name 'lan'
    list   network 'lan'
    option input 'ACCEPT'
    option output 'ACCEPT'
    option forward 'ACCEPT'

config include
    option path '/etc/firewall.user'

/etc/config/network

config interface 'loopback'
	option ifname 'lo'
	option proto 'static'
	option ipaddr '127.0.0.1'
	option netmask '255.0.0.0'

config globals 'globals'
	option ula_prefix 'fd11:bfbc:c18c::/48'

config interface 'service'
	option ifname 'eth0.1'
	option proto 'static'
	option ipaddr '192.168.100.1'
	option netmask '255.255.255.0'

config interface 'lan'
	option type 'bridge'
	option ifname 'eth1.2'
	option proto 'dhcp'

config interface 'private'
	option auto '1'
	option proto 'none'
	option type 'bridge'
	option ifname 'eth1.3'

config interface 'guest'
	option auto '1'
	option proto 'none'
	option type 'bridge'
	option ifname 'eth1.4'

config switch
	option name 'switch0'
	option reset '1'
	option enable_vlan '1'

config switch_vlan
	option device 'switch0'
	option vlan '1'
	option ports '2 0t'

config switch_vlan
	option device 'switch0'
	option vlan '2'
	option ports '1t 6t'

config switch_vlan
	option device 'switch0'
	option vlan '3'
	option ports '1t 6t'

config switch_vlan
	option device 'switch0'
	option vlan '4'
	option ports '1t 6t'

/etc/config/wireless

config wifi-device 'radio0'
	option type 'mac80211'
	option path 'pci0000:00/0000:00:00.0'
	option country 'US'
	option hwmode '11a'
	option htmode 'VHT40'
	option channel '36'
	option txpower '13'
	option basic_rate '12000 18000 24000 36000 48000 54000'
	option supported_rates '12000 18000 24000 36000 48000 54000'
	option log_level '3'

config wifi-device 'radio1'
	option type 'mac80211'
	option path 'platform/qca955x_wmac'
	option country 'US'
	option hwmode '11g'
	option htmode 'HT20'
	option channel '11'
	option txpower '13'
	option basic_rate '12000 18000 24000 36000 48000 54000'
	option supported_rates '12000 18000 24000 36000 48000 54000'
	option log_level '3'

config wifi-iface 'wlan0_ap'
	option mode 'ap'
	option device 'radio0'
	option network 'private'
	option ifname 'wlan0-ap'
	option ssid 'xxx'
	option encryption 'psk2'
	option key 'xxx'

config wifi-iface 'wlan1_ap'
	option mode 'ap'
	option device 'radio1'
	option network 'private'
	option ifname 'wlan1-ap'
	option ssid 'xxx'
	option encryption 'psk2'
	option key 'xxx'

It works well after restart, at least for more than two weeks.

Please let me know if any other information is needed.

Thanks :slight_smile:

Just noticed that when I restart only the WiFi everything is back to normal. I didn't have to restart the AP.

ath10k-firmware-qca9984-ct - 2018-10-10-d366b80d-1
kmod-ath - 4.14.129+4.19.32-1-2
kmod-ath10k-ct - 4.14.129+2019-06-13-a045b1ce-1
kmod-ath9k - 4.14.129+4.19.32-1-2
kmod-ath9k-common - 4.14.129+4.19.32-1-2

Try replacing the -ct ath10k with the mainline ath10k. Replace both the kmod and the firmware.

2 Likes

Thanks @mk24. I compiled a version with mainline ath10k firmware and kmod.

How can I test / reproduce this problem?

Thanks

Watch the memory in use. I see from your run of top that more than 128 MB of RAM was is in use. This should not be the case with a dumb AP. Something in ath10k-ct causes the kernel to allocate huge (for a wifi driver) amounts of RAM, which continues to increase over a period of time. That may be related to other instability-- or not.

1 Like

ath10k-ct is memory hungry, with only a single ath10k-ct interface 128 MB should be sufficient (with two, it wouldn't be anymore), but barely - ath10k has some mitigations to reduce buffers, ath10k-ct does not.

Memory on my device is 256MB.

Got it. I will test it for a few days and report back.

Thanks!

Having an issue with mainline ath10k firmware and kmod

As soon as I connect a STA to 5GHz (ath10k), AP restarts. All I see in log file is:

AP has QCA9984 and I am using the same config files as above.

Wed Aug  7 02:02:35 2019 daemon.debug hostapd: wlan0-ap: STA 04:f0:21:48:ae:d0 IEEE 802.11: authentication OK (open system)
Wed Aug  7 02:02:35 2019 daemon.debug hostapd: wlan0-ap: STA 04:f0:21:48:ae:d0 MLME: MLME-AUTHENTICATE.indication(04:f0:21:48:ae:d0,
OPEN_SYSTEM)
Wed Aug  7 02:02:35 2019 daemon.debug hostapd: wlan0-ap: STA 04:f0:21:48:ae:d0 MLME: MLME-DELETEKEYS.request(04:f0:21:48:ae:d0)
Wed Aug  7 02:02:35 2019 daemon.info hostapd: wlan0-ap: STA 04:f0:21:48:ae:d0 IEEE 802.11: authenticated
Wed Aug  7 02:02:35 2019 daemon.debug hostapd: wlan0-ap: STA 04:f0:21:48:ae:d0 IEEE 802.11: association OK (aid 1)
Wed Aug  7 02:02:35 2019 daemon.info hostapd: wlan0-ap: STA 04:f0:21:48:ae:d0 IEEE 802.11: associated (aid 1)
Wed Aug  7 02:02:35 2019 daemon.debug hostapd: wlan0-ap: STA 04:f0:21:48:ae:d0 MLME: MLME-ASSOCIATE.indication(04:f0:21:48:ae:d0)
Wed Aug  7 02:02:35 2019 daemon.debug hostapd: wlan0-ap: STA 04:f0:21:48:ae:d0 MLME: MLME-DELETEKEYS.request(04:f0:21:48:ae:d0)
Wed Aug  7 02:02:35 2019 daemon.debug hostapd: wlan0-ap: STA 04:f0:21:48:ae:d0 IEEE 802.11: binding station to interface 'wlan0-ap'
Wed Aug  7 02:02:35 2019 daemon.debug hostapd: wlan0-ap: STA 04:f0:21:48:ae:d0 WPA: event 1 notification
Wed Aug  7 02:02:35 2019 daemon.debug hostapd: wlan0-ap: STA 04:f0:21:48:ae:d0 WPA: start authentication
Wed Aug  7 02:02:35 2019 daemon.debug hostapd: wlan0-ap: STA 04:f0:21:48:ae:d0 IEEE 802.1X: unauthorizing port
Wed Aug  7 02:02:35 2019 daemon.debug hostapd: wlan0-ap: STA 04:f0:21:48:ae:d0 WPA: sending 1/4 msg of 4-Way Handshake

This does NOT happen when connecting to 2.4GHz (ath9K) or ath10k-ct firmware and kmod.

kernel - 4.14.134-1-233e5e02ef5e2b2e5ffd578903c35ca8
ath10k-firmware-qca9984 - 20190618-1
kmod-ath - 4.14.134+5.2-rc7-1-1
kmod-ath10k - 4.14.134+5.2-rc7-1-1
kmod-ath9k - 4.14.134+5.2-rc7-1-1
kmod-ath9k-common - 4.14.134+5.2-rc7-1-1

Should I move this to a new thread under "For Developers"?

Thanks

Still having the same issue:
sirq is 100% and devices are not able to connect.

Mem: 103432K used, 150524K free, 56K shrd, 3968K buff, 10752K cached
CPU:   0% usr   0% sys   0% nic   0% idle   0% io   0% irq 100% sirq
Load average: 1.03 0.71 0.31 3/41 2622
  PID  PPID USER     STAT   VSZ %VSZ %CPU COMMAND
    7     2 root     RW       0   0%  89% [ksoftirqd/0]
 2592     2 root     IW       0   0%   9% [kworker/0:1]
 2612   928 root     S     3684   1%   1% sshd: root@pts/0
 2622  2614 root     R     1208   0%   1% top -d2
    5     2 root     IW       0   0%   0% [kworker/u2:0]
 2431     1 root     S     4436   2%   0% /usr/sbin/hostapd -s -P /var/run/wifi-phy0.pid -B /var/run/hosta
  928     1 root     S     3692   1%   0% /usr/sbin/sshd -D
  875     1 root     S     1768   1%   0% /sbin/netifd
    1     0 root     S     1564   1%   0% /sbin/procd
  807     1 root     S     1244   0%   0% /sbin/logd -S 64
  482     1 root     S     1220   0%   0% /sbin/ubusd
 2614  2612 root     S     1220   0%   0% -ash
 1160     1 root     S<    1212   0%   0% /usr/sbin/ntpd -n -N -S /usr/sbin/ntpd-hotplug -p 0.openwrt.pool
 1126   875 root     S     1208   0%   0% udhcpc -p /var/run/udhcpc-eth1.2.pid -s /lib/netifd/dhcp.script
  500     1 root     S     1024   0%   0% /sbin/urngd
  483     1 root     S      920   0%   0% /sbin/askfirst /usr/libexec/login.sh
 2003     2 root     IW       0   0%   0% [kworker/u2:2]
 2226     2 root     IW       0   0%   0% [kworker/0:2]
  196     2 root     SW       0   0%   0% [spi0]
  432     2 root     SWN      0   0%   0% [jffs2_gcd_mtd5]
  568     2 root     IW<      0   0%   0% [cfg80211]
  581     2 root     IW<      0   0%   0% [ath10k_wq]
  366     2 root     IW<      0   0%   0% [kworker/0:1H]
  359     2 root     IW<      0   0%   0% [dsa_ordered]
  582     2 root     IW<      0   0%   0% [ath10k_aux_wq]
  127     2 root     SW       0   0%   0% [kswapd0]
   89     2 root     IW<      0   0%   0% [crypto]
   88     2 root     SW       0   0%   0% [kcompactd0]
   91     2 root     IW<      0   0%   0% [kblockd]
    8     2 root     IW<      0   0%   0% [netns]
   42     2 root     SW       0   0%   0% [oom_reaper]
    2     0 root     SW       0   0%   0% [kthreadd]
    4     2 root     IW<      0   0%   0% [kworker/0:0H]
    6     2 root     IW<      0   0%   0% [mm_percpu_wq]
   86     2 root     IW<      0   0%   0% [writeback]
  340     2 root     IW<      0   0%   0% [ipv6_addrconf]
 2608     2 root     IW       0   0%   0% [kworker/0:0]
609     2 root     IW       0   0%   0% [kworker/u2:1]

This happens at random time. I am using:

ath10k-firmware-qca9984-ct - 2019-06-28-7651f5bb-1
kmod-ath - 4.14.134+5.2-rc7-1-1
kmod-ath10k-ct - 4.14.134+2019-06-13-f0aa8130-1
kmod-ath9k - 4.14.134+5.2-rc7-1-1
kmod-ath9k-common - 4.14.134+5.2-rc7-1-1

any advice? thanks!

What device are you using? There may be others that have seen (and possibly resolved) the issue on the same hardware.

Chipset: SoC QCA9558, QCA9984, switch QCA8337

Did anyone ever get to the bottom of this? I'm experiencing periods of high sirq% with:

    7     2 root     SW       0   0%   2% [ksoftirqd/0]
  378     2 root     IW       0   0%   2% [kworker/0:2]

at the top of my CPU users.

I'm using 19.07.2 on a WNDR 4300. I never had this problem with 18.06.x.

Any further info I can provide to help get to the bottom of it?

1 Like