Troubleshooting OpenWRT

Hi, I have a couple of recurrent problems with the TP-Link Archer C7 v2 router and v22.03.3 (and previous versions): firstly the wifi connection drops off over a period of a couple of weeks becoming slower and more reluctant, when restarting the wireless device(s) fixes it; secondly the wireless occasionally fails completely, requiring a wired connection to again reset the wifi.

I look at the system and kernel logs but never see any obvious errors though I don't know what's normal, for one example, "Switch own primary and secondary channel to get secondary channel with no Beacons from other BSSes". How can I find more about what's causing these problems and filter the wheat from the (disassociated/deauthenticated) chaff?

Let’s start by looking at your config.

Please connect to your OpenWrt device using ssh and copy the output of the following commands and post it here using the "Preformatted text </> " button:
grafik
Remember to redact passwords, MAC addresses and any public IP addresses you may have:

ubus call system board
cat /etc/config/network
cat /etc/config/wireless
cat /etc/config/dhcp
cat /etc/config/firewall

Thanks for the quick reply. Here you go:

router:~# ubus call system board

{
	"kernel": "5.10.161",
	"hostname": "router",
	"system": "Qualcomm Atheros QCA9558 ver 1 rev 0",
	"model": "TP-Link Archer C7 v2",
	"board_name": "tplink,archer-c7-v2",
	"rootfs_type": "squashfs",
	"release": {
		"distribution": "OpenWrt",
		"version": "22.03.3",
		"revision": "r20028-43d71ad93e",
		"target": "ath79/generic",
		"description": "OpenWrt 22.03.3 r20028-43d71ad93e"
	}
}

router:~# cat /etc/config/network

config interface 'loopback'
	option device 'lo'
	option proto 'static'
	option ipaddr '127.0.0.1'
	option netmask '255.0.0.0'

config globals 'globals'
	option ula_prefix 'fda9:483a:76cd::/48'

config device
	option name 'br-lan'
	option type 'bridge'
	list ports 'eth1.1'

config interface 'lan'
	option device 'br-lan'
	option proto 'static'
	option ipaddr 'x.x.x.x'
	option netmask '255.255.255.0'
	option ip6assign '60'
	option ipv6 'off'

config interface 'wan'
	option device 'eth0.2'
	option proto 'dhcp'
	option ipv6 'off'
	option peerdns '0'
	list dns '8.8.8.8'
	list dns '8.8.4.4'

config interface 'wan6'
	option device 'eth0.2'
	option proto 'dhcpv6'

config switch
	option name 'switch0'
	option reset '1'
	option enable_vlan '1'

config switch_vlan
	option device 'switch0'
	option vlan '1'
	option ports '2 3 4 5 0t'

config switch_vlan
	option device 'switch0'
	option vlan '2'
	option ports '1 6t'

router:~# cat /etc/config/wireless

config wifi-device 'radio0'
	option type 'mac80211'
	option hwmode '11a'
	option path 'pci0000:00/0000:00:00.0'
	option country 'GB'
	option htmode 'VHT80'
	option disabled '0'
	option channel '56'
	option txpower '23'
	option cell_density '0'

config wifi-iface 'default_radio0'
	option device 'radio0'
	option network 'lan'
	option mode 'ap'
	option ssid 'MYSSID'
	option encryption 'psk2'
	option key 'mykey'

config wifi-device 'radio1'
	option type 'mac80211'
	option hwmode '11g'
	option path 'platform/ahb/18100000.wmac'
	option country 'GB'
	option disabled '0'
	option txpower '20'
	option cell_density '0'
	option htmode 'HT20'
	option channel '9'

config wifi-iface 'default_radio1'
	option device 'radio1'
	option network 'lan'
	option mode 'ap'
	option ssid 'MYSSID'
	option encryption 'psk2'
	option key 'mykey'

router:~# cat /etc/config/dhcp

config dnsmasq
	option domainneeded '1'
	option localise_queries '1'
	option rebind_protection '1'
	option rebind_localhost '1'
	option local '/lan/'
	option domain 'lan'
	option expandhosts '1'
	option authoritative '1'
	option readethers '1'
	option leasefile '/tmp/dhcp.leases'
	option resolvfile '/tmp/resolv.conf.d/resolv.conf.auto'
	option localservice '1'
	option ednspacket_max '1232'
	list rebind_domain 'plex.direct'

config dhcp 'lan'
	option interface 'lan'
	option start '127'
	option limit '150'
	option leasetime '12h'
	option dhcpv4 'server'
	option dhcpv6 'disabled'
	option ra 'server'
	option ra_slaac '1'
	list ra_flags 'managed-config'
	list ra_flags 'other-config'

config dhcp 'wan'
	option interface 'wan'
	option ignore '1'
	list ra_flags 'none'

config odhcpd 'odhcpd'
	option maindhcp '0'
	option leasefile '/tmp/hosts/odhcpd'
	option leasetrigger '/usr/sbin/odhcpd-update'
	option loglevel '4'

config host
	option mac 'XX:XX:XX:XX:XX:XX'
	option ip 'x.x.x.x'
	option name 'hostno1'
	option dns '1'

[lots more hosts]

router:~# cat /etc/config/firewall

config defaults
	option input 'ACCEPT'
	option output 'ACCEPT'
	option forward 'REJECT'
	option synflood_protect '1'
	option drop_invalid '1'

config zone
	option name 'lan'
	list network 'lan'
	option input 'ACCEPT'
	option output 'ACCEPT'
	option forward 'ACCEPT'

config zone
	option name 'wan'
	list network 'wan'
	list network 'wan6'
	option input 'REJECT'
	option output 'ACCEPT'
	option forward 'REJECT'
	option masq '1'
	option mtu_fix '1'

config forwarding
	option src 'lan'
	option dest 'wan'

config rule
	option name 'Allow-DHCP-Renew'
	option src 'wan'
	option proto 'udp'
	option dest_port '68'
	option target 'ACCEPT'
	option family 'ipv4'

config rule
	option name 'Allow-Ping'
	option src 'wan'
	option proto 'icmp'
	option icmp_type 'echo-request'
	option family 'ipv4'
	option target 'ACCEPT'

config rule
	option name 'Allow-IGMP'
	option src 'wan'
	option proto 'igmp'
	option family 'ipv4'
	option target 'ACCEPT'

config rule
	option name 'Allow-DHCPv6'
	option src 'wan'
	option proto 'udp'
	option src_ip 'fc00::/6'
	option dest_ip 'fc00::/6'
	option dest_port '546'
	option family 'ipv6'
	option target 'ACCEPT'
	option enabled '0'

config rule
	option name 'Allow-MLD'
	option src 'wan'
	option proto 'icmp'
	option src_ip 'fe80::/10'
	list icmp_type '130/0'
	list icmp_type '131/0'
	list icmp_type '132/0'
	list icmp_type '143/0'
	option family 'ipv6'
	option target 'ACCEPT'

config rule
	option name 'Allow-ICMPv6-Input'
	option src 'wan'
	option proto 'icmp'
	list icmp_type 'echo-request'
	list icmp_type 'echo-reply'
	list icmp_type 'destination-unreachable'
	list icmp_type 'packet-too-big'
	list icmp_type 'time-exceeded'
	list icmp_type 'bad-header'
	list icmp_type 'unknown-header-type'
	list icmp_type 'router-solicitation'
	list icmp_type 'neighbour-solicitation'
	list icmp_type 'router-advertisement'
	list icmp_type 'neighbour-advertisement'
	option limit '1000/sec'
	option family 'ipv6'
	option target 'ACCEPT'
	option enabled '0'

config rule
	option name 'Allow-ICMPv6-Forward'
	option src 'wan'
	option dest '*'
	option proto 'icmp'
	list icmp_type 'echo-request'
	list icmp_type 'echo-reply'
	list icmp_type 'destination-unreachable'
	list icmp_type 'packet-too-big'
	list icmp_type 'time-exceeded'
	list icmp_type 'bad-header'
	list icmp_type 'unknown-header-type'
	option limit '1000/sec'
	option family 'ipv6'
	option target 'ACCEPT'
	option enabled '0'

config rule
	option name 'Allow-IPSec-ESP'
	option src 'wan'
	option dest 'lan'
	option proto 'esp'
	option target 'ACCEPT'

config rule
	option name 'Allow-ISAKMP'
	option src 'wan'
	option dest 'lan'
	option dest_port '500'
	option proto 'udp'
	option target 'ACCEPT'

config rule
	option name 'Support-UDP-Traceroute'
	option src 'wan'
	option dest_port '33434:33689'
	option proto 'udp'
	option family 'ipv4'
	option target 'REJECT'
	option enabled '0'

config include
	option path '/etc/firewall.user'

router:~#

This is over-redacted. It is not necessary to redact the RFC1918 network addresses (i.e. 192.168.0.0/16, 172.16.0.0/12, 10.0.0.0/8).

Since your config isn't that far from defaults, I'd recommend upgrading to 23.05.0 and going back to defaults. Then just change the minimum things necessary (i.e. country code, SSID, encryption type, passphrase and any other key things) and then test again.

2 Likes

I'd still like some answers to these questions (the first at least should be trivial):

  • How to increase the logging level, either for specific modules or the whole system?
  • How to decide whether lines in the system and kernel logs are normal or errors?
  • How to find out where delays are happening?

As I said, the slowdown is a longstanding problem and before posting here, I'd read the notes for later releases and didn't see anything which might fix it. As far as I know, I have only made essential changes to the config and frankly was hoping for more than "install the latest version" without any further justification.

You say it's overredacted, I say I do not wish to expose my internal network config.

Basic info here:

You may need to do some searches if you want to learn how to increase the depth and granularity of the logging... some options may require recompiling from source. And be aware, heavy logging activity will likely slow down your system.

Usually, errors are quite clear with "Error" as part of the message. There are also warning messages, and then the vast majority of entries are just status (normal).

That's harder... the symptoms you describe are not common or normal, but there may be more to the story than just your router. Sometimes the problem is elsewhere in your network. For example, there are USB-C docking hubs that have a bug where the device will produce broadcast storms over ethernet (sufficient to completely kill the network) when the host system is powered down/sleeping or disconnected. It can take a while before someone locates that device as the culprit because it 'seems' like it must be the router (this is just an example, but a true one... there are other similar things I can share, too). So simplifying the network and working to isolate things by literally going back to basics is sometimes required. But yes, looking at logs may provide some clues, or it may not in some situations.

As you wish. However, in the process, it becomes harder for us to help. Essentially everybody on the planet who is using IPv4 behind a NAT masquerading router is (or should be) using the RFC1918 address range for their LANs. There is nothing secret about them, and it doesn't expose anything private about the network. The only value to anyone else is if they have a direct ethernet or wifi connection to your network, and even still, once they are on the network, they can assertain at least the subnet to which they are connected without needing to see configs. I'm happy to share that my subnets are 10.0.1.0/24 (lan), 10.0.3.0/24 (guest), and 10.0.4.0/24 (iot), and then I have VPNs on 10.0.21.0/24 10.0.1.22.0/24 and 20.0.23.0/24. Sharing that info won't compromise my network (and these are the real subnets I use).

1 Like

My network isn't complicated, I don't have any USB hubs. I suspect the router and wanted to find out how to investigate and troubleshoot it, so far without success. Redacting my subnet does not make it "harder for us to help."

With hindsight, it was perhaps foolish of me to expect answers by posting on the forum dedicated to the product.

Your response is surprisingly snarky... I was giving you legit advice. Maybe the bigger point here is that you probably haven't done enough investigating/testing/debugging to actually isolate the problem to the router itself. Can this be an issue with OpenWrt? Yes, of course. But can it be somewhere else in your network -- yes...very much so.

To give you an example of how we call can fall into certain traps like this: I personally had major network problems a few years ago. I did a bunch of troubleshooting, and everything seemed to suggest that it was my core switch (at the time, a 24-port managed gigabit PoE buisness grade device). Things were hanging, bandwidth was inconsistent, packets would just get dropped... and nothing had changed in terms of connected equipment and wiring on my network. I was arranging to RMA my switch when I unplugged one device and suddenly everything started working normally again. What was it?? Well, I had a Sonos Bridge (which is just basically a special AP that used to be necessary in some situations for Sonos systems) and I also had a Sonos Connect (which is one of the music players), both wired to the network. They had been like that for years without any problems. But... a Sonos update must have changed their STP algorithm and it suddenly was causing switching loops which caused all the issues I described. But removing the Bridge, all the problems cleared up. It makes perfect sense in highsight, but I didn't see it until I stumbled into the solution.

So yeah, when I suggest that you do more troubleshooting and isolation, it's for a good reason.

EDIT: I also want to point out that your device is well supported and very popular with OpenWrt users. There are very few reports of issues related to OpenWrt performance on this device. This means that your situation is different -- it could be a configuration issue, a hardware issue with your C7, or a problem caused by something else on your network. You need to experiment to find out what it might be... I was trying to give you examples of how you need to think of your network as a system and work to identify and isolate the problem.

It might be worth pointing out that you are asking us for help... so we're asking for the things that are relevant and not actually sensitive/private information that could compromise the security of your network. In this very simple case, it's likely that this will not be the problem, but we cannot know for sure that you are using an RFC1918 address range.

2 Likes

Some of the logging was put there by developers and would require looking at and understanding the source code. When something goes wrong or when working on adding support for a new device a user can post the logs for developers to look at.

The OpenWRT developers do not write all the drivers or other code. It comes from Linux kernel and other developers with the changes getting merged into OpenWRT on a regular basis. OpenWRT does not duplicate change history from upstream and usually just merges a specific version with that as the commit message. There is no detailed list of what changed between versions - for that you would need to check upstream for each component. A few packages are managed as OpenWRT projects and have more detailed change logs.

There is no guide to sorting through everything as so much comes from upstream projects that change on a regular basis.

If the signal degrades over time and improves after a reboot then the most likely culprit is signal interference. The rebooting process means the router can search for a less congested frequency slot. You didn't tell me if you live in a condo or crowded area. If affirmative then I am confident that is your problem.

You should add a script to cron to reboot/wifi restart at 4am each morning so it picks up a new less congested frequency slot.

The channel is fixed, so the reboot wouldn't affect the channel selection. But certainly environmental conditions may be at play here.

While we're on the topic of the radio, though...

channel 9 is not recommended. Typically 1, 6, and 11 (or possibly 12 or 13 in the UK) should be used. Channel 9 will likely experience a lot of noise from APs that are using 6 or 11 since it's nominally in the guard band between the two commonly used channels.

  1. Your wireless config contains references to deprecated setting option hwmode => can you use option band instead ? Also:
    Use option band '2g' (instead of option hwmode '11g')
    Use option band '5g' (instead of option hwmode '11a')
    see also https://openwrt.org/docs/guide-user/network/wifi/basic#common_options
    (not sure whether this will make a difference though)

  2. you are using a DFS channel (option channel '56')
    => possibly you are running into a DFS related issue - can you use a non-DFS channel instead like 48 ?

  3. For troubleshooting: possibly these commands will return different output at times when you are facing issues?

iwinfo
iw list
iw phy0 info (or iw wlan0 info)