PBR stops working when anything happens to wan

I have PBR set up on my router to redirect all non-local traffic from one particular client (TV) via a vpn, while everything else is directed to wan as normal.

Specifically, the PBR rules are as follows:

  • VPN-Test: Route all traffic via the vpn. (For testing purposes. Disabled.)
  • Local: Route any traffic from TV to the local network via the default route
  • TV: Route any traffic from TV via the vpn
  • VpnConfig: Route any traffic destined for the network on the other side of the vpn via the vpn
  • Default: Route all traffic via wan

So far so good, and indeed it works perfectly.

Until anything happens to wan. Then everything grinds to a halt.

Specifically, by “anything happens to wan” I mean connectivity drops, the cable gets unplugged for a moment, the modem reboots, even just renewing the DHCP lease is enough to trigger the symptoms: the router decides it doesn’t want to do any routing anymore, and completely disconnects the lan from the outside world.

Devices on lan can access the router no problem. The router can access the internet and the vpn no problem. Devices on lan find that the internet and vpn are both unreachable.

Then I log in to the router, manually restart the PBR service, and everything goes back to normal. Until the next day, when the DHCP lease gets renewed and the cycle repeats.

I’m at a loss. Ive spent several hours staring at routing tables, but my relatively untrained eyes can’t see anything amiss. And I’m already feeling pretty out of my depth, so any help would be greatly appreciated.

Configuration info below the cut

Configuration
$ ubus call system board:
{
	"kernel": "6.6.93",
	"hostname": "[REDACTED]",
	"system": "ARMv8 Processor rev 4",
	"model": "Linksys E8450 (UBI)",
	"board_name": "linksys,e8450-ubi",
	"rootfs_type": "squashfs",
	"release": {
		"distribution": "OpenWrt",
		"version": "24.10.2",
		"revision": "r28739-d9340319c6",
		"target": "mediatek/mt7622",
		"description": "OpenWrt 24.10.2 r28739-d9340319c6",
		"builddate": "1750711236"
	}
}


$ cat /etc/config/network:

config interface 'loopback'
	option device 'lo'
	option proto 'static'
	option ipaddr '127.0.0.1'
	option netmask '255.0.0.0'

config globals 'globals'
	option ula_prefix 'fd21:af3b:4428::/48'

config device
	option name 'br-lan'
	option type 'bridge'
	list ports 'lan1'
	list ports 'lan2'
	list ports 'lan3'
	list ports 'lan4'

config interface 'lan'
	option device 'br-lan'
	option proto 'static'
	option ip6assign '60'
	option delegate '0'
	list ipaddr '192.168.0.1/24'
	list dns '192.168.0.2'

config interface 'wan'
	option device 'wan'
	option proto 'dhcp'

config interface 'wan6'
	option device 'wan'
	option proto 'dhcpv6'

config interface 'vpn'
	option proto 'none'
	option device 'tun0'



$ cat /etc/config/firewall:

config defaults
	option input 'REJECT'
	option output 'ACCEPT'
	option forward 'REJECT'
	option synflood_protect '1'
	option flow_offloading '1'
	option flow_offloading_hw '1'

config zone
	option name 'lan'
	option input 'ACCEPT'
	option output 'ACCEPT'
	option forward 'ACCEPT'
	list network 'lan'

config zone
	option name 'wan'
	option input 'REJECT'
	option output 'ACCEPT'
	option forward 'REJECT'
	option masq '1'
	option mtu_fix '1'
	list network 'wan'
	list network 'wan6'

config rule
	option name 'Allow-DHCP-Renew'
	option src 'wan'
	option proto 'udp'
	option dest_port '68'
	option target 'ACCEPT'
	option family 'ipv4'

config rule
	option name 'Allow-Ping'
	option src 'wan'
	option proto 'icmp'
	option icmp_type 'echo-request'
	option family 'ipv4'
	option target 'ACCEPT'

config rule
	option name 'Allow-IGMP'
	option src 'wan'
	option proto 'igmp'
	option family 'ipv4'
	option target 'ACCEPT'

config rule
	option name 'Allow-DHCPv6'
	option src 'wan'
	option proto 'udp'
	option dest_port '546'
	option family 'ipv6'
	option target 'ACCEPT'

config rule
	option name 'Allow-MLD'
	option src 'wan'
	option proto 'icmp'
	option src_ip 'fe80::/10'
	list icmp_type '130/0'
	list icmp_type '131/0'
	list icmp_type '132/0'
	list icmp_type '143/0'
	option family 'ipv6'
	option target 'ACCEPT'

config rule
	option name 'Allow-ICMPv6-Input'
	option src 'wan'
	option proto 'icmp'
	list icmp_type 'echo-request'
	list icmp_type 'echo-reply'
	list icmp_type 'destination-unreachable'
	list icmp_type 'packet-too-big'
	list icmp_type 'time-exceeded'
	list icmp_type 'bad-header'
	list icmp_type 'unknown-header-type'
	list icmp_type 'router-solicitation'
	list icmp_type 'neighbour-solicitation'
	list icmp_type 'router-advertisement'
	list icmp_type 'neighbour-advertisement'
	option limit '1000/sec'
	option family 'ipv6'
	option target 'ACCEPT'

config rule
	option name 'Allow-ICMPv6-Forward'
	option src 'wan'
	option dest '*'
	option proto 'icmp'
	list icmp_type 'echo-request'
	list icmp_type 'echo-reply'
	list icmp_type 'destination-unreachable'
	list icmp_type 'packet-too-big'
	list icmp_type 'time-exceeded'
	list icmp_type 'bad-header'
	list icmp_type 'unknown-header-type'
	option limit '1000/sec'
	option family 'ipv6'
	option target 'ACCEPT'

config rule
	option name 'Allow-IPSec-ESP'
	option src 'wan'
	option dest 'lan'
	option proto 'esp'
	option target 'ACCEPT'

config rule
	option name 'Allow-ISAKMP'
	option src 'wan'
	option dest 'lan'
	option dest_port '500'
	option proto 'udp'
	option target 'ACCEPT'

[PORT FORWARDING RULES REDACTED]


config forwarding
	option src 'lan'
	option dest 'wan'

config zone
	option name 'Vpn'
	option input 'REJECT'
	option output 'ACCEPT'
	option forward 'REJECT'
	option masq '1'
	option mtu_fix '1'
	list network 'vpn'

config forwarding
	option src 'lan'
	option dest 'Vpn'


$ cat /etc/config/pbr:

config pbr 'config'
	option enabled '1'
	option verbosity '2'
	option strict_enforcement '1'
	option resolver_set 'none'
	list resolver_instance '*'
	option ipv6_enabled '0'
	list ignored_interface 'vpnserver'
	option boot_timeout '30'
	option rule_create_option 'add'
	option procd_boot_trigger_delay '5000'
	option procd_reload_delay '1'
	option webui_show_ignore_target '0'
	option nft_rule_counter '0'
	option nft_set_auto_merge '1'
	option nft_set_counter '0'
	option nft_set_flags_interval '1'
	option nft_set_flags_timeout '0'
	option nft_set_policy 'performance'
	list webui_supported_protocol 'all'
	list webui_supported_protocol 'tcp'
	list webui_supported_protocol 'udp'
	list webui_supported_protocol 'tcp udp'
	list webui_supported_protocol 'icmp'

config include
	option path '/usr/share/pbr/pbr.user.dnsprefetch'
	option enabled '0'

config include
	option path '/usr/share/pbr/pbr.user.aws'
	option enabled '0'

config include
	option path '/usr/share/pbr/pbr.user.netflix'
	option enabled '0'

config dns_policy
	option name 'Redirect Local IP DNS'
	option src_addr '192.168.1.5'
	option dest_dns '1.1.1.1'
	option enabled '0'

config policy
	option name 'VPN-Test'
	option src_addr '0.0.0.0/0'
	option dest_addr '0.0.0.0/0'
	option interface 'vpn'
	option enabled '0'

config policy
	option name 'Local'
	option src_addr 'TV'
	option dest_addr '192.168.0.0/24'
	option interface 'wan'

config policy
	option name 'TV'
	option src_addr 'TV'
	option interface 'vpn'

config policy
	option name 'VpnConfig'
	option src_addr '0.0.0.0/0'
	option dest_addr '192.168.1.0/24'
	option interface 'vpn'

config policy
	option name 'Default'
	option interface 'wan'
	option src_addr '0.0.0.0/0'
	option dest_addr '0.0.0.0/0'



$ service pbr status:

pbr - environment
pbr 1.2.0-r2 installed on OpenWrt 24.10.2.

Dnsmasq version 2.90  Copyright (c) 2000-2024 Simon Kelley
Compile time options: IPv6 GNU-getopt no-DBus UBus no-i18n no-IDN DHCP no-DHCPv6 no-Lua TFTP no-conntrack no-ipset no-nftset no-auth no-cryptohash no-DNSSEC no-ID loop-detect inotify dumpfile

pbr fw4 nft file: /usr/share/nftables.d/ruleset-post/30-pbr.nft
add chain inet fw4 pbr_mark_0x010000
add rule inet fw4 pbr_mark_0x010000  mark set mark and 0xff00ffff xor 0x010000
add rule inet fw4 pbr_mark_0x010000 return
add chain inet fw4 pbr_mark_0x020000
add rule inet fw4 pbr_mark_0x020000  mark set mark and 0xff00ffff xor 0x020000
add rule inet fw4 pbr_mark_0x020000 return
add rule inet fw4 pbr_prerouting ip saddr { 192.168.0.152 } ip daddr { 192.168.0.0/24 }  goto pbr_mark_0x010000 comment "Local"
add rule inet fw4 pbr_prerouting ip saddr { 192.168.0.152 }  goto pbr_mark_0x020000 comment "TV"
add rule inet fw4 pbr_prerouting ip saddr { 0.0.0.0/0 } ip daddr { 192.168.1.0/24 }  goto pbr_mark_0x020000 comment "VpnConfig"
add rule inet fw4 pbr_prerouting ip saddr { 0.0.0.0/0 } ip daddr { 0.0.0.0/0 }  goto pbr_mark_0x010000 comment "Default"

pbr chains - policies
	chain pbr_forward { # handle 40
	}
	chain pbr_input { # handle 41
	}
	chain pbr_output { # handle 42
	}
	chain pbr_postrouting { # handle 44
	}
	chain pbr_prerouting { # handle 43
		ip saddr 192.168.0.152 ip daddr 192.168.0.0/24 goto pbr_mark_0x010000 comment "Local" # handle 34200
		ip saddr 192.168.0.152 goto pbr_mark_0x020000 comment "TV" # handle 34201
		ip saddr 0.0.0.0/0 ip daddr 192.168.1.0/24 goto pbr_mark_0x020000 comment "VpnConfig" # handle 34202
		ip saddr 0.0.0.0/0 ip daddr 0.0.0.0/0 goto pbr_mark_0x010000 comment "Default" # handle 34203
	}
	chain pbr_dstnat { # handle 39
	}

pbr chains - marking
	chain pbr_mark_0x010000 { # handle 33964
		meta mark set meta mark & 0xff01ffff | 0x00010000 # handle 34196
		return # handle 34197
	}
	chain pbr_mark_0x020000 { # handle 33967
		meta mark set meta mark & 0xff02ffff | 0x00020000 # handle 34198
		return # handle 34199
	}

pbr nft sets

pbr tables & routing
IPv4 table 256 pbr_wan route:
default via [WAN GATEWAY REDACTED] dev wan 
IPv4 table 256 pbr_wan rule(s):
30000:	from all fwmark 0x10000/0xff0000 lookup pbr_wan

IPv4 table 257 pbr_vpn route:
default via 10.8.0.10 dev tun0 
IPv4 table 257 pbr_vpn rule(s):
29998:	from all fwmark 0x20000/0xff0000 lookup pbr_vpn



$ ip -4 route show table all:
default via [WAN GATEWAY REDACTED] dev wan table pbr_wan 
192.168.0.0/24 dev br-lan table pbr_wan proto kernel scope link src 192.168.0.1 
default via 10.8.0.10 dev tun0 table pbr_vpn 
192.168.0.0/24 dev br-lan table pbr_vpn proto kernel scope link src 192.168.0.1 
default via [WAN GATEWAY REDACTED] dev wan proto static src [WAN IP REDACTED] 
10.8.0.9 dev tun0 proto kernel scope link src 10.8.0.10 
[WAN GATEWAY REDACTED]/22 dev wan proto kernel scope link src [WAN IP REDACTED] 
192.168.0.0/24 dev br-lan proto kernel scope link src 192.168.0.1 
local 10.8.0.10 dev tun0 table local proto kernel scope host src 10.8.0.10 
local 127.0.0.0/8 dev lo table local proto kernel scope host src 127.0.0.1 
local 127.0.0.1 dev lo table local proto kernel scope host src 127.0.0.1 
broadcast 127.255.255.255 dev lo table local proto kernel scope link src 127.0.0.1 
local [WAN IP REDACTED] dev wan table local proto kernel scope host src [WAN IP REDACTED]
broadcast [WAN BCAST REDACTED] dev wan table local proto kernel scope link src [WAN IP REDACTED]
local 192.168.0.1 dev br-lan table local proto kernel scope host src 192.168.0.1
broadcast 192.168.0.255 dev br-lan table local proto kernel scope link src 192.168.0.1


$ ip -4 rule list
0:	from all lookup local
29998:	from all fwmark 0x20000/0xff0000 lookup pbr_vpn
30000:	from all fwmark 0x10000/0xff0000 lookup pbr_wan
32766:	from all lookup main
32767:	from all lookup default

Please refrain using 0.0.0.0/0 as src or dest.

You want to use src_addr for the whole interface with a host address i.e 192.168.1.0/24 or only a few clients.

The issue I see is that you create two routes one for the entire router, this works as priority over the other one, in luci the most highest rule has more priority than the lower one, however the way you defined it makes not much sense, it will either result in only wan or no connection.

By default if a interface is not specified in pbr, it goes over wan, so you don't need to exclusively put in things to wan, but you want to if you use a example like this:

I got a network called pcnet and 10.34.79.0/24 is routed over vpn.

basicly this creates it like this in pbr:

config policy
   option name 'route-pcnet-to-vpn'
   option src_addr ' 10.34.79.0/24'
   option interface 'wgclient'

But one device I want to route over wan, this generates the config like:

config policy
   option name 'route-single-pc-wan'
   option src_addr '10.34.79.3/32'
   option interface 'wan'

config policy
   option name 'route-pcnet-to-vpn'
   option src_addr ' 10.34.79.0/24'
   option interface 'wgclient'

If you would reflect this in luci the wan policy I made has a higher priority than the bottom one, in your situation it overrides the same src route or maybe even more as 0.0.0.0/0 means the full internet.

It is possible though the default intention to wan is not default by PBR or not present when forwarding is only to a non wan and wan zone is not forwarded in firewall settings, as what I think it should, but you can add this yourself by adding a firewall rule inside the traffic rules:

config rule
        option name 'Allow-bypass-vpn-mark'
        option src '*'
        option dest 'wan'
        option target 'ACCEPT'
        option mark '0x10000/0xff0000'

The firewall mark is taken by checking ip rule, I use this rule so long and never failed on me :slight_smile:

2 Likes

In addition and to underline what @xize already remarked you can delete the above rule as your default rule is already via the wan

2 Likes

Alright, I’ve removed any references to 0.0.0.0\0, and I’ve removed the 'Default’ policy from PBR.

Just for my own learning, how would that have been causing the symptoms I described? I’m still pretty new to the nitty gritty of how routing works, so the connection here is not obvious to me.

If this was enabled, you would be routing the full internet to the internet.

But because you are using masquarading which is not a bad setting actually required for NAT translation, you route the full internet to your gateway ip basicly it will never reach internet, 0.0.0.0 be just the router.

This would mean the full internet or because it is masquaraded the router itself to the 192 ip.

Normally every flow goes from src to dest, firewalls work the same way, if you sent a packet to a destination the other side is allowed to respond on the same line and the firewall gives the green card.

If the other side initiated first it will be blocked.

Due to the nature a route is actually correct you need atleast have an source, which mean you want to route a host address from a interface i.e 192.168.1.0/24 it can be leaved empty but I advise not to, it will make pbr slower and more prone to misconfiguration, the destination address is less important here, and the interface selection is ment as the outgoing interface, there is no need to specify the gateway in the destination field :slight_smile:

The destination field is more if you want the policy count only for that destination, so if you want to use vpn only for google.com you use google.com in the destination field, local ips don't have much sense as it isn't a route on the vpn interface.

The source acts the same way pretty much, if you want to restrict policy to only use the source of a interface you add 192.168.1.0/24.

So either one of the two is required, best is to do it on source.

Edit

Also another few things but this is tl;dr and handy to know:

If you plan to make more interfaces with different network like with vlans, you want to make sure inside the interface creation wizard under the advanced tab that default gateway has been unchecked.

If this is checked, it will result the interface will act as a wan interface giving the default route to the router which can mess the outgoing interfaces in PBR.

With vpns like wireguard make sure it doesn't do any routing from itself, so allow routing must be disabled on the peers PBR becomes your central place for routing :slight_smile: , for wireguard servers this speaks different you want the server to route and ignore it in PBR.

1 Like

Very nice write up, thanks @xize

Small addition when ipv6 is also enabled use the interface (device) with @ in front as source e.g. @br-guest or in case your source is a single client use the MAC address.
Because these will take care of both ipv4 and ipv6

2 Likes

Ok, I think I’m starting to understand a bit, but let me just make sure I’ve got it right.

Here’s the impressions that I had that it seems like you’re saying are incorrect:

I thought that 0.0.0.0/0 would match any ip, but it actually only matches the router.

I thought PBR policies only apply to traffic coming to the router from lan, but they actually apply to any traffic the router receives.

Follow-up question: How is it that my original policies were doing exactly what I wanted (other than the wan reset issue) given that, according to my understanding of this thread, none of these policies make any sense? And how would renewing the DHCP lease on wan be the thing that breaks it?

The short awnser is that PBR basicly replaces most of the routing, what is left empty is following defaults.

PBR detects gateways, and uses often the wan interface as default this also prefers the 'use default gateway' option if you create or edit a interface it's inside the advanced tab, this often should be only enabled on wan or a wan type interface, lan is probably a exception, but other interfaces need it unchecked.

So by default the routing goes to wan what is not set in pbr, but if you made a vpn interface it is recommended to let PBR taken care of the routes, and then it is advised to not use any default route on that interface.

To match any ip inside PBR you leave the field empty :slight_smile:

As for:

This seem to stem from a misconfiguration, usually it won't break but PBR starts re-routing, this will enable the kill switch for a short time.

but if it is a misconfiguration especially with this 'use default gateway' checkbox on other interfaces you can view this when editing them on the advanced settings tab, then PBR might chooses the wrong interface as wan, you will have a wan conflict basicly.

0.0.0.0 just means the masquaraded gateway/router ip on that interface, so with other words in a misconfiguration it starts pointing to a non wan interface because pbr thinks it is the default gateway, you could see in luci for PBR where the checkmark belongs to as gateway when it is done routing (you can verify logs too).

And without policies... 2 wans use metrics, but both have metric of 0 and one of the 2 was not intended as wan so when restarted the other takes over, this checkbox is easily checked by creation of a interface, but really needs to be unchecked :slight_smile: