VXLAN MTU problem

Hello,
my idea is to use VXLAN to create a network segment via 3 routers that are connected via lan and then make this segment available on all these routers via WiFi.
To do this I would like to set up VXLAN with a multicast address.

However, my problem is that when I activate a DHCP server on the alias VXLAN interface with the VXLAN internal IP address, no client receives an address and no requests are received.

What am I doing wrong?

/etc/config/network

config interface 'vxlan15'
	option proto 'vxlan'
	option peeraddr '239.1.1.15'
	option vid '15'
	option tunlink 'lan'

config interface 'v15'
	option proto 'static'
	option delegate '0'
	option ipv6 '0'
	option ipaddr '192.168.15.5'
	option netmask '255.255.255.0'
	option mtu '2048'
	option device '@vxlan15'

/etc/config/wireless

config wifi-iface 'wifinet1'
	option device 'radio0'
	option mode 'ap'
	option ssid 'OpenWrt_v15'
	option encryption 'psk2'
	option key 'xxxx'
	option network 'v15'

ok, i got it working so far, but now i have another problem.
My client is connected via wifi to a different AP.
There is a route configured for 192.168.251.0/24 with GW: 192.168.251.5

Because of MTU issues i have problems to access the IOT devices (http) which are connected to the wifi bind to the vlan which is build over vxlan.

if i ping the vxlan or vlan.1 interface from router .4 from my workstation, i can go up to 1452 bytes
client -> wireless -> router_1 -> ethernet -> router_2 -> vxlan / vlan.1

ping -M do 192.168.251.4 -s 1424
PING 192.168.251.4 (192.168.251.4) 1424(1452) bytes of data.
1432 bytes from 192.168.251.4: icmp_seq=1 ttl=64 time=4.24 ms

until the packages get dropped (no response)

if i try to ping the vxlan or vlan.1 interface from router .5 to .4 packages up to 1422 work.
router_1 -> ethernet -> router_2 -> vxlan / vlan.1

root@ar300m_2:~# ping 192.168.251.4 -s 1422
PING 192.168.251.4 (192.168.251.4): 1422 data bytes
1430 bytes from 192.168.251.4: seq=0 ttl=64 time=1.425 ms

if i set my client's mtu for the wifi interface to a lower mtu i can reach all IOT devices webinterface.

Any idea how to fix that problem ?

Router .x.5

config interface 'loopback'
	option device 'lo'
	option proto 'static'
	option ipaddr '127.0.0.1'
	option netmask '255.0.0.0'

config globals 'globals'
	option ula_prefix 'fd2e:3ae1:9ce6::/48'
	option packet_steering '1'

config device
	option name 'br-lan'
	option type 'bridge'
	list ports 'eth0'
	list ports 'vxlan'
	option stp '1'
	option igmp_snooping '1'
	option promisc '1'
	option bridge_empty '1'
	option ipv6 '0'
	option mtu '2048'

config interface 'lan'
	option device 'br-lan'
	option proto 'static'
	option ipaddr '192.168.178.5'
	option netmask '255.255.255.0'
	option ip6assign '60'
	list dns '192.168.178.20'
	option gateway '192.168.178.1'

config interface 'manage'
	option proto 'static'
	option device 'phy0-ap0'
	option ipaddr '192.168.2.1'
	option netmask '255.255.255.0'

config device
	option type 'bridge'
	option name 'br-vlan1'
	option stp '1'
	option igmp_snooping '1'
	option promisc '1'
	option bridge_empty '1'
	list ports 'vxlan.1'
	option ipv6 '0'

config interface 'vlan1'
	option device 'br-vlan1'
	option proto 'static'
	option ipaddr '192.168.251.5'
	option netmask '255.255.255.0'

config interface 'vxlink'
	option proto 'static'
	option delegate '0'
	option ipv6 '0'
	option ipaddr '192.168.250.5'
	option netmask '255.255.255.0'
	option device '@vxlan'
	option type 'bridge'

config interface 'vxlan'
	option proto 'vxlan'
	option peeraddr '239.1.1.250'
	option vid '250'
	option peerdns '0'
	option delegate '0'
	option tunlink 'lan'

config device
	option type '8021q'
	option ifname 'vxlan'
	option vid '1'
	option name 'vxlan.1'
	option ipv6 '0'

config device
	option name 'eth0'
	option ipv6 '0'
	option mtu '2048'

config device
	option name 'eth1'
	option ipv6 '0'
	option mtu '2048'

config device
	option name 'vxlan'
	option ipv6 '0'

Router .x.4

config interface 'loopback'
	option proto 'static'
	option ipaddr '127.0.0.1'
	option netmask '255.0.0.0'
	option device 'lo'

config globals 'globals'
	option packet_steering '1'

config interface 'lan'
	option proto 'static'
	option gateway '192.168.178.1'
	option delegate '0'
	option device 'br-lan'
	list ipaddr '192.168.178.4/24'
	list dns '192.168.178.20'

config switch
	option name 'switch0'
	option reset '1'
	option enable_vlan '1'

config switch_vlan
	option device 'switch0'
	option vlan '1'
	option vid '1'
	option ports '0 6 2 3 4 5 1'

config interface 'manage'
	option proto 'static'
	option ipaddr '192.168.1.1'
	option netmask '255.255.255.0'
	option delegate '0'

config device
	option name 'br-lan'
	option type 'bridge'
	list ports 'eth1'
	list ports 'vxlan'
	option stp '1'
	option igmp_snooping '1'
	option promisc '1'
	option bridge_empty '1'
	option mtu '2048'
	option ipv6 '0'

config interface 'tasmo'
	option proto 'static'
	option ipaddr '192.168.13.1'
	option netmask '255.255.255.0'
	option disabled '1'

config interface 'iot'
	option proto 'static'
	option ipaddr '192.168.14.1'
	option netmask '255.255.255.0'
	option device 'phy1-ap2'

config interface 'wg0'
	option proto 'wireguard'
	option private_key 'xx'
	option force_link '1'
	list addresses '192.168.20.2/24'
	option disabled '1'

config wireguard_wg0
	option public_key 'xx='
	option description 'vds'
	option persistent_keepalive '25'
	option endpoint_port '51820'
	list allowed_ips '192.168.20.1/32'
	option route_allowed_ips '1'
	option endpoint_host 'vds'

config device
	option type 'bridge'
	option name 'br-vlan1'
	option stp '1'
	option igmp_snooping '1'
	option promisc '1'
	option bridge_empty '1'
	list ports 'vxlan.1'
	option ipv6 '0'

config interface 'vlan1'
	option device 'br-vlan1'
	option proto 'static'
	option ipaddr '192.168.251.4'
	option netmask '255.255.255.0'

config interface 'vxlink'
	option proto 'static'
	option delegate '0'
	option ipv6 '0'
	option ipaddr '192.168.250.4'
	option netmask '255.255.255.0'
	option device '@vxlan'
	option type 'bridge'

config interface 'vxlan'
	option proto 'vxlan'
	option peeraddr '239.1.1.250'
	option vid '250'
	option peerdns '0'
	option delegate '0'
	option tunlink 'lan'

config device
	option type '8021q'
	option ifname 'vxlan'
	option vid '1'
	option name 'vxlan.1'
	option ipv6 '0'

config device
	option name 'eth1'
	option mtu '2048'
	option ipv6 '0'

config device
	option name 'eth0'
	option mtu '2048'
	option ipv6 '0'

config device
	option name 'vxlan'

Yes the mtu on the vxlan backbone needs to be large enough to hold encapsulated packets and frames.
So either your local network uses a lower MTU or the vxlan is adjusted.
Did you checked logs that you are able to set 2048?

Where can i check that and what would be the right strategy ?

Whatever i tested bridge / eth = 2048 e.g. or lower the vlan mtu, nothing helped.

If the mtu is to large it will be stated in the log, access via logread right after you brought up the interface.
With IP link you can also check the maxmtu of an device.

But where do i start ?
The vxlan interface ? The bridge whith vxlan and eth assigned ?
Or the Wifi / VLAN ?

My last test was to increase the vxlan interface mtu to 2048. No changes.
The i also set the bridge and eth0 to 2048. No changes

Whatever i did i was not able to increase the ping size and send a full 1500 byte ping

After I changed the MTU on the bridge/eth interface.
I no longer get warnings about the mtu size, but packets larger than 1429 are still being dropped.

before:

ping -M do 192.168.250.4 -s 1429
PING 192.168.250.4 (192.168.250.4) 1429(1457) bytes of data.
ping: local error: message too long, mtu=1450

after:

ping -M do 192.168.250.4 -s 1429
PING 192.168.250.4 (192.168.250.4) 1429(1457) bytes of data.
From 192.168.178.1: icmp_seq=1 Redirect Host(New nexthop: 192.168.178.5)                     
--- 192.168.250.4 ping statistics ---
7 packets transmitted, 0 received, 100% packet loss, time 6111ms

AFAIK in total VxLAN needs 90 Byte extra.

So if you want to stay with an default mtu of 1500 on LAN (any local network), your VxLAN-Underlay needs at least 1590 Byte.

See for instance https://packetpushers.net/blog/vxlan-udp-ip-ethernet-bandwidth-overheads/

Yes, and I have set the mtu to eth / br to 2048.
But still the ping to the vxlan alias that was in this bridge was dropped from 1429 bytes.

That's what I can't explain right now.

When I did this, the mtu of the vxlan interface was automatically set to 1998 bytes. All the interfaces below it (vlan) had 1500 bytes

What else can cause the packages to be dropped

But you have ensured that you can reach all VTEP and that an vxlan encapsulated packet goes over the wire?

Yes, up to a certain size I was able to ping the alias (VTEP) interfaces.
And i was also able to ping the vlan addresses

So you have a vxlan on top of a direct connection between a router and (dump) ap? Why?

Here https://vincent.bernat.ch/en/blog/2017-vxlan-linux you find useful Tipps how to debug and setup on Linux. I would check and debug first and also trying to set it up manually.

it's because of an router in between which is not able to handle vlan's

Did you checked the path mtu? Only indirect. Your router does not support larger mtu on the two interfaces, facing the router and the ap?

I have now connected 2 openwrt routers directly via a stupid Netgear switch for testing and configured the vxlan.
vxlan and alias interface are not in any firewall zone and the global zone setting is set to accept, accept, accept.

Without any configuration, tracepath reports a PMTU of 1450.

Again, after increasing the mtu for the bridge, eth and vxlan interface
all packages get droppend if they are bigger.

root@OpenWrt:~# tracepath -b -m 2 192.168.250.2                                                                                                                                                               
 1?: [LOCALHOST]                      pmtu 1998                                                                                                                                                               
 1:  no reply                                                                                                                                                                                                 
 2:  no reply                                                                                                                                                                                                 
     Too many hops: pmtu 1998                                                                                                                                                                                 
     Resume: pmtu 1998

Which is probably incapable of switching jumbo frames.

Just use a direct connection and use a network for testing which is currently not in use.

same with a zyxel GS1200-5 which supports jumbo frames

Have ordered a crosslink adapter also.

What I find absolutely incomprehensible is that when I set the mtu correctly, I get the following:

ping -M do 192.168.251.2 -s 2000
PING 192.168.251.2 (192.168.251.2) 2000(2028) bytes of data.
ping: local error: message too long, mtu=1500
ping: local error: message too long, mtu=1500

but everything between 1450 and 1500 bytes get dropped.

ping -M do 192.168.251.2 -s 1472 -c 3
PING 192.168.251.2 (192.168.251.2) 1472(1500) bytes of data.
From 192.168.178.1: icmp_seq=1 Redirect Host(New nexthop: 192.168.178.6)

--- 192.168.251.2 ping statistics ---
3 packets transmitted, 0 received, 100% packet loss, time 2027ms

with less or equal 1450 bytes it works.

ping -M do 192.168.251.2 -s 1422 -c 3
PING 192.168.251.2 (192.168.251.2) 1422(1450) bytes of data.
1430 bytes from 192.168.251.2: icmp_seq=1 ttl=64 time=3.63 ms
1430 bytes from 192.168.251.2: icmp_seq=2 ttl=64 time=3.29 ms
1430 bytes from 192.168.251.2: icmp_seq=3 ttl=64 time=3.16 ms

--- 192.168.251.2 ping statistics ---
3 packets transmitted, 3 received, 0% packet loss, time 2002ms
rtt min/avg/max/mdev = 3.155/3.355/3.626/0.198 ms

Damn, could be that i found the problem.
The MTU within the webinterface does not reflects the reality.

root@OpenWrt:~# ip link set dev eth0 mtu 1554
Error: mtu greater than device maximum.

I. told. you. so. In my first reply to you!

And again, check the max mtu with ip -d link

Sorry, I thought the GUI would display the 'real' values.

I assume maxmtu is a hardware / driver limitation ?