Occasionally I lose default route with PPPoE upstream

I have an OpenWrt 21.02.1 router that connects to wan via PPPoE. Once in a while (not quite daily) Internet stops working and when I look on the router there is no default route. 'ifdown wan' followed by 'ifup wan' fixes this. The logs indicate that around the time I lost my default route there was a PPP timeout and reconnect. So it would seem that sometimes when this happens it doesn't correctly restore the default route.

It also seems to me that this started right after I set up IPv6, with WAN6 configured as a dhcpv6 client interface. I'm a bit hazy on how these interfaces interact so I'm not sure if there might be a connection here.

Any thoughts on what the problem might be or how to debug it better?

Work-arounds? Putting in a static default route doesn't work because since it's the same as the one supplied by PPP the WAN interface going down removes it regardless.

Please run the following commands (copy-paste the whole block) and paste the output here, using the "Preformatted text </> " button:
grafik
Remember to redact passwords, MAC addresses and any public IP addresses you may have

ubus call system board; \
uci export network; \
uci export dhcp; uci export firewall; \
head -n -0 /etc/firewall.user; \
ip -4 addr ; ip -4 ro li tab all ; ip -4 ru; ifstatus wan

You could use different metric to have them both, but it is more important to find the issue.

{
	"kernel": "5.4.154",
	"hostname": "router5",
	"system": "MediaTek MT7621 ver:1 eco:3",
	"model": "MikroTik RouterBOARD 750Gr3",
	"board_name": "mikrotik,routerboard-750gr3",
	"release": {
		"distribution": "OpenWrt",
		"version": "21.02.1",
		"revision": "r16325-88151b8303",
		"target": "ramips/mt7621",
		"description": "OpenWrt 21.02.1 r16325-88151b8303"
	}
}
package network

config interface 'loopback'
	option device 'lo'
	option proto 'static'
	option ipaddr '127.0.0.1'
	option netmask '255.0.0.0'

config globals 'globals'
	option packet_steering '1'
	option ula_prefix 'fd06:633c:9029::/48'

config device
	option name 'br-lan'
	option type 'bridge'
	list ports 'lan2'
	list ports 'lan3'
	list ports 'lan4'
	list ports 'lan5'

config device
	option name 'lan2'
	option macaddr '48:8f:5a:df:d9:1a'

config device
	option name 'lan3'
	option macaddr '48:8f:5a:df:d9:1a'

config device
	option name 'lan4'
	option macaddr '48:8f:5a:df:d9:1a'

config device
	option name 'lan5'
	option macaddr '48:8f:5a:df:d9:1a'

config interface 'lan'
	option device 'br-lan'
	option proto 'static'
	list dns '172.23.0.1'
	list ipaddr '172.23.0.1/16'
	list ipaddr '172.23.0.15/16'
	option ip6assign '64'
	list ip6class 'wan_6'

config device
	option name 'wan'
	option macaddr '64:D1:54:43:11:E5'

config interface 'wan'
	option device 'wan'
	option proto 'pppoe'
	option password 'xxxxxxxxx'
	option username 'xxxxx'
	option ipv6 'auto'
	option peerdns '0'

config interface 'wan6'
	option device 'wan'
	option proto 'dhcpv6'
	option reqaddress 'try'
	option reqprefix '56'
	option peerdns '0'

package dhcp

config dnsmasq
	option domainneeded '1'
	option localise_queries '1'
	option rebind_protection '1'
	option rebind_localhost '1'
	option local '/lan/'
	option domain 'lan'
	option expandhosts '1'
	option authoritative '1'
	option readethers '1'
	option leasefile '/tmp/dhcp.leases'
	option localservice '1'
	option ednspacket_max '1232'
	option sequential_ip '1'
	option noresolv '1'
	option doh_backup_noresolv '-1'
	list doh_backup_server ''
	list server '127.0.0.1#5054'
	list server '127.0.0.1#5053'

config dhcp 'lan'
	option interface 'lan'
	option leasetime '12h'
	option dhcpv4 'server'
	option dhcpv6 'server'
	option ra 'server'
	list ra_flags 'managed-config'
	list ra_flags 'other-config'
	option start '1024'
	option limit '1024'
	option ra_slaac '0'

config dhcp 'wan'
	option interface 'wan'
	option ignore '1'
	list ra_flags 'none'

config odhcpd 'odhcpd'
	option maindhcp '0'
	option leasefile '/tmp/hosts/odhcpd'
	option leasetrigger '/usr/sbin/odhcpd-update'
	option loglevel '4'

package firewall

config defaults
	option syn_flood '1'
	option input 'ACCEPT'
	option output 'ACCEPT'
	option forward 'REJECT'

config zone
	option name 'lan'
	option input 'ACCEPT'
	option output 'ACCEPT'
	option forward 'ACCEPT'
	list network 'lan'

config zone
	option name 'wan'
	list network 'wan'
	list network 'wan6'
	option input 'REJECT'
	option output 'ACCEPT'
	option forward 'REJECT'
	option masq '1'
	option mtu_fix '1'

config forwarding
	option src 'lan'
	option dest 'wan'

config rule
	option name 'Allow-DHCP-Renew'
	option src 'wan'
	option proto 'udp'
	option dest_port '68'
	option target 'ACCEPT'
	option family 'ipv4'

config rule
	option name 'Allow-Ping'
	option src 'wan'
	option proto 'icmp'
	option icmp_type 'echo-request'
	option family 'ipv4'
	option target 'ACCEPT'

config rule
	option name 'Allow-IGMP'
	option src 'wan'
	option proto 'igmp'
	option family 'ipv4'
	option target 'ACCEPT'

config rule
	option name 'Allow-DHCPv6'
	option src 'wan'
	option proto 'udp'
	option src_ip 'fc00::/6'
	option dest_ip 'fc00::/6'
	option dest_port '546'
	option family 'ipv6'
	option target 'ACCEPT'

config rule
	option name 'Allow-MLD'
	option src 'wan'
	option proto 'icmp'
	option src_ip 'fe80::/10'
	list icmp_type '130/0'
	list icmp_type '131/0'
	list icmp_type '132/0'
	list icmp_type '143/0'
	option family 'ipv6'
	option target 'ACCEPT'

config rule
	option name 'Allow-ICMPv6-Input'
	option src 'wan'
	option proto 'icmp'
	list icmp_type 'echo-request'
	list icmp_type 'echo-reply'
	list icmp_type 'destination-unreachable'
	list icmp_type 'packet-too-big'
	list icmp_type 'time-exceeded'
	list icmp_type 'bad-header'
	list icmp_type 'unknown-header-type'
	list icmp_type 'router-solicitation'
	list icmp_type 'neighbour-solicitation'
	list icmp_type 'router-advertisement'
	list icmp_type 'neighbour-advertisement'
	option limit '1000/sec'
	option family 'ipv6'
	option target 'ACCEPT'

config rule
	option name 'Allow-ICMPv6-Forward'
	option src 'wan'
	option dest '*'
	option proto 'icmp'
	list icmp_type 'echo-request'
	list icmp_type 'echo-reply'
	list icmp_type 'destination-unreachable'
	list icmp_type 'packet-too-big'
	list icmp_type 'time-exceeded'
	list icmp_type 'bad-header'
	list icmp_type 'unknown-header-type'
	option limit '1000/sec'
	option family 'ipv6'
	option target 'ACCEPT'

config rule
	option name 'Allow-IPSec-ESP'
	option src 'wan'
	option dest 'lan'
	option proto 'esp'
	option target 'ACCEPT'

config rule
	option name 'Allow-ISAKMP'
	option src 'wan'
	option dest 'lan'
	option dest_port '500'
	option proto 'udp'
	option target 'ACCEPT'

config rule
	option name 'Support-UDP-Traceroute'
	option src 'wan'
	option dest_port '33434:33689'
	option proto 'udp'
	option family 'ipv4'
	option target 'REJECT'
	option enabled '0'

config include
	option path '/etc/firewall.user'

# This file is interpreted as shell script.
# Put your custom iptables rules here, they will
# be executed with each firewall (re-)start.

# Internal uci firewall chains are flushed and recreated on reload, so
# put custom rules into the root chains e.g. INPUT or FORWARD or into the
# special user chains, e.g. input_wan_rule or postrouting_lan_rule.
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN qlen 1000
    inet 127.0.0.1/8 scope host lo
       valid_lft forever preferred_lft forever
8: br-lan: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP qlen 1000
    inet 172.23.0.1/16 brd 172.23.255.255 scope global br-lan
       valid_lft forever preferred_lft forever
    inet 172.23.0.15/16 brd 172.23.255.255 scope global secondary br-lan
       valid_lft forever preferred_lft forever
43: pppoe-wan: <POINTOPOINT,MULTICAST,NOARP,UP,LOWER_UP> mtu 1480 qdisc fq_codel state UNKNOWN qlen 3
    inet x.x.x.x peer 100.65.0.1/32 scope global pppoe-wan
       valid_lft forever preferred_lft forever
default via 100.65.0.1 dev pppoe-wan 
100.65.0.1 dev pppoe-wan scope link  src 5.180.148.57 
172.23.0.0/16 dev br-lan scope link  src 172.23.0.1 
local x.x.x.x dev pppoe-wan table local scope host  src 5.180.148.57 
broadcast 127.0.0.0 dev lo table local scope link  src 127.0.0.1 
local 127.0.0.0/8 dev lo table local scope host  src 127.0.0.1 
local 127.0.0.1 dev lo table local scope host  src 127.0.0.1 
broadcast 127.255.255.255 dev lo table local scope link  src 127.0.0.1 
broadcast 172.23.0.0 dev br-lan table local scope link  src 172.23.0.1 
local 172.23.0.1 dev br-lan table local scope host  src 172.23.0.1 
local 172.23.0.15 dev br-lan table local scope host  src 172.23.0.1 
broadcast 172.23.255.255 dev br-lan table local scope link  src 172.23.0.1 
0:	from all lookup local 
32766:	from all lookup main 
32767:	from all lookup default 
{
	"up": true,
	"pending": false,
	"available": true,
	"autostart": true,
	"dynamic": false,
	"uptime": 27004,
	"l3_device": "pppoe-wan",
	"proto": "pppoe",
	"device": "wan",
	"updated": [
		"addresses",
		"routes"
	],
	"metric": 0,
	"dns_metric": 0,
	"delegation": true,
	"ipv4-address": [
		{
			"address": "x.x.x.x",
			"mask": 32,
			"ptpaddress": "100.65.0.1"
		}
	],
	"ipv6-address": [
		{
			"address": "fe80::fd6b:de46:110d:3931",
			"mask": 128
		}
	],
	"ipv6-prefix": [
		
	],
	"ipv6-prefix-assignment": [
		
	],
	"route": [
		{
			"target": "0.0.0.0",
			"mask": 0,
			"nexthop": "100.65.0.1",
			"source": "0.0.0.0/0"
		}
	],
	"dns-server": [
		
	],
	"dns-search": [
		
	],
	"neighbors": [
		
	],
	"inactive": {
		"ipv4-address": [
			
		],
		"ipv6-address": [
			
		],
		"route": [
			{
				"target": "0.0.0.0",
				"mask": 0,
				"nexthop": "100.65.0.1",
				"source": "0.0.0.0/0"
			}
		],
		"dns-server": [
		],
		"dns-search": [
			
		],
		"neighbors": [
			
		]
	},
	"data": {
		
	}
}


Any reason why you are not using the gateway provided by the PPPoE server?

The default route is tied to the WAN interface, but OpenWrt will give a different name to the physical interface, could this be the issue?

No, I am using route from PPPoE actually... the static route in the output is just something I put there to test if I could use it as a work-around, and I forgot to remove it before generating this output.

There are a few mistakes, but nothing that could affect the wan gateway.
If you don't actually need the secondary IP address in lan interface, remove it.
You have the ipv6 option 'auto' in wan interface, which means the wan_6 interface is automatically spawned. But you also have the wan6 interface configured and in wan zone there is only wan6. Remove the wan6 and add wan_6 in the wan firewall zone.
You can also run the ifstatus wan when the problem occurs again and paste it here.

The secondary IP in the LAN interfaces is deliberate; I have a bunch of routers and APs on my net and the more important ones have spares that can I swap in at moment's notice or even instruct a helper to swap in when I'm off-site, so these devices have a "device IP" (here 0.15) and a "role IP" (here 0.1).

The bit about wan6 and wan_6 is interesting as I really didn't understand very well why there was 2 of them; OpenWrt comes with wan6 out of the box, right? I'd happily remove it, but I can't seem to find any way to "add wan_6 to the wan firewall zone" in Luci because as it's dynamically created clicking the "Edit" button doesn't do anything. I thought it was the fact that I had wan6 configured as a "dhcpv6 client" that dynamically created the wan_6 interface, but if I understand you correctly it's the "Obtain IPv6 address: Automatic" option on the wan interface that's doing that, right?

You can edit firewall wan zone and add it there.

right

The other thing you could do is set the ipv6 option to manual in wan. Then wan_6 will not be spawned anymore and you can configure the wan6 as you wish. Don't forget to fix the ip6class in lan interface.

OK, so I deleted wan6 and that didn't seem to have any adverse effects, but I cannot add wan_6 to the wan zone no matter what. On the firewall config page going to the wan zone it seems to give me the option to do so, but the line with wan_6 says "(no interfaces attached)" and although I can click the checkbox and then do "save", it doesn't stick, if I go back there it's just interface "wan" in the wan zone again. On the interfaces page wan_6 remains grey rather than red.

Can you test it with ip6tables-save -c -t filter | grep wan ?

Can we see the logs, when this issue happens, and the router reconnects?

To see if there's IPv6 traffic in the wan zone? Yes, there is.

1 Like

I'll send them when it next happens; if it doesn't happen by Saturday, I'll play around with triggering it by briefly unplugging the uplink cable or something.

Okay, then it should be included and you don't need to take further action.

Understood, but it could be considered a UI bug that it appears grey rather than red in the list of interfaces.

Yes, it seems so.