B.A.T.M.A.N. 802.11s mesh unusable - duplicates and throughput

Setup:

WAN <-> OPNSense - VLAN tags to Openwrt (22.03) - Multiple Wifi networks based on VLANs - Multiple access points wanting to join mesh and distribute Wifi based on the VLANs

  • Testing with iperf3 and speedtestcc packages.
  • 802.11s mesh works fine on its own for primary network.

Problem:

  • When I introduce BATMAN into environment throughput drops from 300mbit/s to 6kbit/s with packet loss, failures, and duplicated pings.

Relevant Contents of /etc/config/network on device with Ethernet cable containing VLANs

config device
	option name 'br-lan'
	option type 'bridge'
	option ipv6 '0'
	list ports 'bat0'
	list ports 'lan1'
	list ports 'lan2'
	list ports 'lan4'

config interface 'lan'
	option proto 'static'
	option netmask '255.255.255.0'
	option gateway '192.168.23.1'
	option ipaddr '192.168.23.4'
	option device 'br-bat.1'
	list dns '192.168.23.1'
	option delegate '0'

config bridge-vlan
	option device 'br-lan'
	option vlan '1'
	list ports 'bat0'
	list ports 'lan1:u*'
	list ports 'lan2'
	list ports 'lan4'

config bridge-vlan
	option device 'br-lan'
	option vlan '666'
	list ports 'bat0:t'
	list ports 'lan1:t'

config interface 'IOT'
	option device 'br-bat.666'
	option proto 'dhcp'

config interface 'bat0'
	option proto 'batadv'
	option hop_penalty '30'
	option isolation_mark '0x00000000/0x00000000'
	option log_level '0'
	option multicast_fanout '16'
	option orig_interval '1000'
	option gw_mode 'off'
	option delegate '0'
	option routing_algo 'BATMAN_IV'
	option distributed_arp_table '0'
	option fragmentation '0'
	option multicast_mode '0'
	option network_coding '0'

config device
	option name 'bat0'
	option ipv6 '0'

config device
	option type '8021q'
	option ifname 'bat0'
	option vid '666'
	option name 'bat0.666'
	option ipv6 '0'

config device
	option type 'bridge'
	option name 'br-bat.666'
	list ports 'bat0.666'
	list ports 'br-lan.666'
	option ipv6 '0'

config device
	option type '8021q'
	option ifname 'bat0'
	option vid '1'
	option name 'bat0.1'
	option ipv6 '0'

config device
	option type 'bridge'
	option name 'br-bat.1'
	list ports 'bat0.1'
	list ports 'br-lan.1'
	option ipv6 '0'

config interface 'MESH'
	option proto 'batadv_hardif'
	option master 'bat0'
	option delegate '0'
	option throughput_override '0'

config device
	option name 'wlan1'
	option mtu '1532'
	option ipv6 '0'

Contents of /etc/config/wireless (Device only is being used for MESH at this time for the example)

config wifi-device 'radio1'
	option type 'mac80211'
	option path '1e140000.pcie/pci0000:00/0000:00:01.0/0000:02:00.0'
	option band '5g'
	option cell_density '0'
	option country 'CA'
	option htmode 'VHT80'
	option channel '100'

config wifi-iface 'wifinet0'
	option device 'radio1'
	option mode 'mesh'
	option encryption 'sae'
	option mesh_id 'MESH'
	option mesh_fwding '0'
	option mesh_rssi_threshold '0'
	option key 'abcdef123'
	option network 'MESH'

I'm able to create even a 2 node mesh with Batman and request DHCP addresses from the second mesh router interface, and also with a wireless network on 2nd node from laptop.

Speedtest results (Directly connected Ethernet)

Directly connected Ethernet (Node 1)

Testing download speed (32) .................................................................................................................................................
Download: 363.12 Mbit/s
Testing upload speed (12) ....................................
Upload: 17.08 Mbit/s

Mesh node without Batman connected via wireless (4 nodes)

Testing download speed (32) .............................................................................................................................................................................................................
Download: 240.13 Mbit/s
Testing upload speed (8) .................................
Upload: 17.04 Mbit/s

Mesh node - B.A.T.M.A.N. connected via wireless (Node 2)

Finding fastest server... Unable to download server list. Try again later

Iperf 3 from Node to Node 802.11s no batman (same Subnet)

[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-10.00  sec   177 MBytes   149 Mbits/sec    0             sender
[  5]   0.00-10.00  sec   176 MBytes   147 Mbits/sec                  receiver

Iperf3 from 802.11s node to internet iperf3 host

[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-10.00  sec  17.7 MBytes  14.9 Mbits/sec   14             sender
[  5]   0.00-10.10  sec  17.3 MBytes  14.4 Mbits/sec                  receiver

Ipef3 from Node to Node Batman (Same Subnet)

[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-10.02  sec   239 MBytes   200 Mbits/sec    0             sender
[  5]   0.00-10.02  sec   239 MBytes   200 Mbits/sec                  receiver

Iperf3 from Batman Node to internet iperf3 host

[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-10.00  sec  42.4 KBytes  34.7 Kbits/sec   13             sender
[  5]   0.00-10.09  sec  5.66 KBytes  4.59 Kbits/sec                  receiver

2nd node (and all other nodes if they existed) /etc/config/network


config interface 'bat0'
	option proto 'batadv'
	option routing_algo 'BATMAN_IV'
	option distributed_arp_table '0'
	option fragmentation '0'
	option gw_mode 'off'
	option hop_penalty '30'
	option multicast_mode '0'
	option network_coding '0'

config interface 'MESH'
	option proto 'batadv_hardif'
	option master 'bat0'
	option delegate '0'

config device
	option type '8021q'
	option ifname 'bat0'
	option vid '666'
	option name 'bat0.666'
	option ipv6 '0'

config device
	option name 'wlan1'
	option mtu '1532'
	option ipv6 '0'

config interface 'IOT'
	option proto 'dhcp'
	option device 'bat0.666'

2nd node (and all other nodes if they existed) /etc/config/wireless

config wifi-device 'radio1'
	option type 'mac80211'
	option path 'platform/soc/a800000.wifi'
	option band '5g'
	option htmode 'VHT80'
	option channel '100'
	option country 'CA'
	option cell_density '0'

config wifi-iface 'default_radio1'
	option device 'radio1'
	option mode 'mesh'
	option mesh_id 'MESH'
	option mesh_fwding '0'
	option mesh_rssi_threshold '0'
	option encryption 'sae'
	option key 'mishmash'
	option network 'MESH'

config wifi-iface 'wifinet1'
	option device 'radio1'
	option mode 'ap'
	option ssid 'IOT'
	option encryption 'none'
	option network 'IOT'

Ping results

Finally, when I ping:

802.11 Mesh node ping no batman

root@mesh3:~# ping google.ca
PING google.ca (142.250.69.195): 56 data bytes
64 bytes from 142.250.69.195: seq=0 ttl=60 time=26.436 ms
64 bytes from 142.250.69.195: seq=1 ttl=60 time=26.120 ms
64 bytes from 142.250.69.195: seq=2 ttl=60 time=30.295 ms
64 bytes from 142.250.69.195: seq=3 ttl=60 time=20.534 ms
^C
--- google.ca ping statistics ---
4 packets transmitted, 4 packets received, 0% packet loss
round-trip min/avg/max = 20.534/25.846/30.295 ms

802.11 Mesh node with BATMAN

root@mesh15:~# ping google.ca
ping: bad address 'google.ca'
root@mesh15:~# ping google.ca
PING google.ca (142.250.69.195): 56 data bytes
64 bytes from 142.250.69.195: seq=0 ttl=60 time=25.734 ms
64 bytes from 142.250.69.195: seq=0 ttl=60 time=25.842 ms (DUP!)
64 bytes from 142.250.69.195: seq=1 ttl=60 time=26.875 ms
64 bytes from 142.250.69.195: seq=1 ttl=60 time=27.088 ms (DUP!)
64 bytes from 142.250.69.195: seq=2 ttl=60 time=22.788 ms
64 bytes from 142.250.69.195: seq=2 ttl=60 time=23.004 ms (DUP!)
64 bytes from 142.250.69.195: seq=3 ttl=60 time=20.689 ms
64 bytes from 142.250.69.195: seq=3 ttl=60 time=20.903 ms (DUP!)
64 bytes from 142.250.69.195: seq=4 ttl=60 time=22.631 ms
64 bytes from 142.250.69.195: seq=4 ttl=60 time=23.430 ms (DUP!)
64 bytes from 142.250.69.195: seq=5 ttl=60 time=20.993 ms
64 bytes from 142.250.69.195: seq=5 ttl=60 time=22.186 ms (DUP!)
^C
--- google.ca ping statistics ---
6 packets transmitted, 6 packets received, 6 duplicates, 0% packet loss
round-trip min/avg/max = 20.689/23.513/27.088 ms

Question -

What the heck am I doing wrong? I believe I have a fairly simple setup here wanting to run a VLAN over Batman but it seems that there are duplicate packets, routing errors and I just don't know where to look at next. Would I be able to ask the community to see if they see anything wrong with my configuration above?

Do I read your config correctly that you bridge a bridge?

You only need the batman hardif, your vlan bridges and then You attach the tagged batman interface to each Vlan bridge.

yes, yes I might! I'm a bit frazzled here - so bear with me, and thanks for replying regarding my mess.

  • My LAN bridge br-lan (connected to OPNSense, to my DSA Router) is tagging VLAN 666onbat0andlan1. VLANID 1` is set to untagged.
  • This automatically created a br-lan.1 and br-lan.666 VLAN in devices (Base Device br-lan).
  • I then created bat0.1 and bat0.666 VLANs as children of base device bat0
  • Then added to my br-lan bridge bat0.1 along with lan1,2,4`
  • I then created br-bat.666 bridge with the vlan ports bat0.666 and br-lan.666
  • I then created a br-bat.1 bridge with vlan ports bat0.1 and br-lan.1`
  • I set my interfaces LAN to use bridge br-bat.1
  • I set my interface IOT to use bridge br-bat.666

With that configuration above I can get DHCP on those interfaces and I can create my mesh, however this is where the OP comes in. Which bridge to you think I have made a mistake on?

Full /etc/config/network
root@wavlink:~# cat /etc/config/network

config interface 'loopback'
	option device 'lo'
	option proto 'static'
	option ipaddr '127.0.0.1'
	option netmask '255.0.0.0'

config globals 'globals'
	option packet_steering '1'
	option ula_prefix 'fdaa:6397:dc42::/48'

config device
	option name 'br-lan'
	option type 'bridge'
	option ipv6 '0'
	option mtu '2304'
	option bridge_empty '1'
	list ports 'bat0.1'
	list ports 'lan1'
	list ports 'lan2'
	list ports 'lan4'

config interface 'lan'
	option proto 'static'
	option netmask '255.255.255.0'
	option gateway '192.168.23.1'
	option ipaddr '192.168.23.4'
	list dns '192.168.23.1'
	option delegate '0'
	option device 'br-bat.1'

config interface 'MGMT'
	option proto 'static'
	option device 'lan3'
	option netmask '255.255.255.0'
	option delegate '0'
	option ipaddr '192.168.1.4'

config bridge-vlan
	option device 'br-lan'
	option vlan '1'
	option mtu '2304'
	list ports 'bat0.1'
	list ports 'lan1:u*'
	list ports 'lan2'
	list ports 'lan4'

config bridge-vlan
	option device 'br-lan'
	option vlan '666'
	option mtu '2304'
	list ports 'bat0.1:t'
	list ports 'lan1:t'

config interface 'IOT'
	option proto 'dhcp'
	option device 'br-bat.666'

config interface 'bat0'
	option proto 'batadv'
	option hop_penalty '30'
	option isolation_mark '0x00000000/0x00000000'
	option log_level '0'
	option multicast_fanout '16'
	option orig_interval '1000'
	option gw_mode 'off'
	option delegate '0'
	option routing_algo 'BATMAN_IV'
	option aggregated_ogms '1'
	option bonding '1'
	option bridge_loop_avoidance '1'

config device
	option name 'bat0'
	option ipv6 '0'

config device
	option type '8021q'
	option ifname 'bat0'
	option vid '666'
	option name 'bat0.666'
	option ipv6 '0'

config device
	option type 'bridge'
	option name 'br-bat.666'
	list ports 'bat0.666'
	list ports 'br-lan.666'
	option ipv6 '0'
	option stp '1'

config device
	option type '8021q'
	option ifname 'bat0'
	option vid '1'
	option name 'bat0.1'
	option ipv6 '0'

config device
	option type 'bridge'
	option name 'br-bat.1'
	list ports 'bat0.1'
	list ports 'br-lan.1'
	option ipv6 '0'
	option stp '1'

config interface 'MESH'
	option proto 'batadv_hardif'
	option master 'bat0'
	option delegate '0'
	option mtu '2304'
	option throughput_override '0'

config device
	option name 'wlan0'
	option mtu '1532'
	option ipv6 '0'

config device
	option name 'wlan1'
	option mtu '1532'
	option ipv6 '0'

config device
	option name 'br-lan.666'
	option type '8021q'
	option ifname 'br-lan'
	option vid '666'
	option ipv6 '0'

config device
	option name 'br-lan.1'
	option type '8021q'
	option ifname 'br-lan'
	option vid '1'
	option ipv6 '0'

Your second bridge br-bat. This you don't need. (But, can you explain why you think you need it?)

As a reference parts of my config (I use as much as possible default values, because I have not found yet a reason to "tune" batman-adv in a home setup). The device is a TP LINK Archer C7v5 so non-DSA.

config switch                                                          
    option  name            'switch0'                                  
    option  reset           '1'                                        
    option  enable_vlan     '1

config device
    option  name            'bat0'
    option  macaddr         '02:00:01:00:00:01'

config interface            'bat0'
    option  proto           'batadv'
    option  routing_algo    'BATMAN_IV'

config interface            'bat0_hardif_mesh0'
    option  proto           'batadv_hardif'
    option  master          'bat0'
    option  mtu             '2304'

config interface            'bat0_hardif_mesh1'
    option  proto           'batadv_hardif'
    option  master          'bat0'
    option  mtu             '2304'

# VLAN 16/0x10: net.mgmt             
config switch_vlan                     
    option  device          'switch0'  
    option  ports           '2t 0t'            
    option  vlan            '16'         
                                               
config device                                  
    option  name            'br-vlan16'        
    option  type            'bridge'           
    list    ports           'eth0.16'          
    list    ports           'bat0.16'          
    option  macaddr         '02:00:01:01:00:10'
                                             
config interface            'vlan16'         
    option  device          'br-vlan16'                                
    option  bridge_empty    '1'                
    option  igmp_snooping   '1'              
    option  proto           'static'         
    option  ipaddr          '192.168.16.1/24'
    list    ip6ifaceid      '::1'              
    list    ip6ifaceid      'eui64'
    option  ip6assign       '64'  
    option  ip6hint         '10'

# VLAN 65/0x41: user.clients                                           
config switch_vlan                                                     
    option  device          'switch0'                                  
    option  ports           '2t 0t'                                    
    option  vlan            '65'                                       
                                                                       
config device                                                          
    option  name            'br-vlan65'                                
    option  type            'bridge'                                   
    list    ports           'eth0.65'                                  
    list    ports           'bat0.65'                                  
    option  macaddr         '02:00:01:01:00:41'                        
                                                                       
config interface            'vlan65'                                   
    option  device          'br-vlan65'                                
    option  bridge_empty    '1'                                        
    option  igmp_snooping   '1'                                        
    option  proto           'static'                                   
    option  ipaddr          '192.168.65.1/24'                          
    list    ip6ifaceid      '::1'                                      
    list    ip6ifaceid      'eui64'                                    
    option  ip6assign       '64'                                       
    option  ip6hint         '41'

Edit: On this device I have no "access ports" where user can connect directly, only a trunk (:2t). But if you want/need to have an access port, just add your port (untagged) to the vlan-bridge.