Vlans across multiple openwrt devices - getting dropouts

Hi,
I moved to a new apartment with preexisting ethernet cables infrastructure, and decided to reorganize my network, that consists of 2 openwrt devices, in the following way:

  1. main router is the ax3000t, situated near the fiber optics point, and near a single ethernet outlet
  2. secondary router is the x86 machine (running proxmox and openwrt as VM)

I have 4 VLANs:

  1. LAN
  2. Guest
  3. IoT Offline (wireless only)
  4. IoT Online (wireless only)

the ax3000t has 4 ports, with intended use as follows:

  1. WAN
  2. trunk to x86
  3. LAN
  4. Guest VLAN

the x86 machine has 4 ports, with intended use as follows:

  1. management port (for now)
  2. port for trunk from ax3000t
  3. port for LAN
  4. port for Guest VLAN

this is my attempted configuration (stp and priority were added later trying to mitigate the issue):

ax3000t:

root@OpenWrt:~# cat /etc/config/network

config interface 'loopback'
        option device 'lo'
        option proto 'static'
        option ipaddr '127.0.0.1'
        option netmask '255.0.0.0'

config globals 'globals'
        option ula_prefix 'fd2e:bd80:3fd4::/48'
        option packet_steering '1'

config device
        option name 'br-lan'
        option type 'bridge'
        list ports 'lan2'
        list ports 'lan3'
        list ports 'lan4'
        option stp '1'
        option bridge_empty '1'
        option priority '4096'

config interface 'lan'
        option device 'br-lan.1'
        option proto 'static'
        option ipaddr '192.168.1.1'
        option netmask '255.255.255.0'
        option ip6assign '60'
        list dns '8.8.8.8'
        list dns '8.8.4.4'
        list dns '1.1.1.1'

config device
        option name 'wan'
        option macaddr '**:**:**:**:**:**'

config interface 'wan'
        option device 'wan'
        option proto 'dhcp'

config interface 'wan6'
        option device 'wan'
        option proto 'dhcpv6'
        option reqaddress 'try'
        option reqprefix 'auto'

config bridge-vlan
        option device 'br-lan'
        option vlan '1'
        list ports 'lan2:t'
        list ports 'lan3:u*'
        list ports 'lan4:u*'

config bridge-vlan
        option device 'br-lan'
        option vlan '10'
        list ports 'lan2:t'

config interface 'iot_online'
        option device 'br-lan.10'
        option ipaddr '192.168.10.1'
        option netmask '255.255.255.0'
        option proto 'static'

config bridge-vlan
        option device 'br-lan'
        option vlan '11'
        list ports 'lan2:t'

config interface 'iot_offline'
        option device 'br-lan.11'
        option ipaddr '192.168.11.1'
        option netmask '255.255.255.0'
        option proto 'static'

config bridge-vlan
        option device 'br-lan'
        option vlan '12'
        list ports 'lan2:t'

config interface 'guest'
        option device 'br-lan.12'
        option ipaddr '192.168.12.1'
        option netmask '255.255.255.0'
        option proto 'static'

x86:

root@mmd-router:~# cat /etc/config/network

config interface 'loopback'
        option device 'lo'
        option proto 'static'
        option ipaddr '127.0.0.1'
        option netmask '255.0.0.0'

config globals 'globals'
        option ula_prefix 'fd11:fb70:ae50::/48'

config device
        option name 'br-lan'
        option type 'bridge'
        option stp '1'
        option priority '8192'
        option bridge_empty '1'
        list ports 'eth0'
        list ports 'eth1'
        list ports 'eth2'
        list ports 'eth3'

config bridge-vlan
        option device 'br-lan'
        option vlan '1'
        list ports 'eth0:u*'
        list ports 'eth2:u*'
        list ports 'eth1:t'

config bridge-vlan
        option device 'br-lan'
        option vlan '10'
        list ports 'eth1:t'

config bridge-vlan
        option device 'br-lan'
        option vlan '11'
        list ports 'eth1:t'

config bridge-vlan
        option device 'br-lan'
        option vlan '12'
        list ports 'eth1:t'
        list ports 'eth3:u*'

config interface 'lan'
        option proto 'static'
        option device 'br-lan.1'
        option ipaddr '192.168.1.2'
        option netmask '255.255.255.0'
        option gateway '192.168.1.1'
        list dns '192.168.1.1'
        option ip6assign '60'

config interface 'iot_online'
        option proto 'none'
        option device 'br-lan.10'
        option ipaddr '192.168.10.1'
        option netmask '255.255.255.0'

config interface 'iot_offline'
        option proto 'none'
        option device 'br-lan.11'
        option ipaddr '192.168.11.1'
        option netmask '255.255.255.0'

config interface 'guest'
        option proto 'none'
        option device 'br-lan.12'

what works: when the x86 is connected alone to the ax3000t, the assigned ports get IPs on the VLANs as configured

what doesn't work: when adding a client to the LAN of the ax3000t (port 3), the client connected to LAN on the x86 (port 3) starts experiencing dropouts in connectivity/dhcp, that last for a few seconds to a few minutes.

can you please help me identify the issue?

p.s. here's my attempt at a diagram of the topology:

Your current configuration does not match this description.

Are you trying to use this x86 device as a managed switch? This is not a recommend approach, especially when running a virtualized router. If you're running bare metal, that's fine, although still not optimal. But when virtuallized, it actually highly dependent on the correct configuration of the supervisor/hypervisor and host OS.

You should remove the last 3 lines from this bridge. They're not going to do anything useful here.

And likewise, remove the stp, priority, and brigde_empty lines from the x86 box:

I highly suspect that it is one of two issues: STP and/or the configuration of the virtualized router (at all layers).

What is the true purpose of the x86 device? Why do you have 2 routers?

1 Like

can you please explain what is not matching?

I think I do. ultimately, the ax3000t can't be moved to where the x86 is, and the x86 has the needed ports to allow the physical connections to the VLANs needed.
regarding the x86 and virtualization: I passed through the PCIs matching eth1, eth2, eth3 to the openwrt VM, and the "management" PCI NIC is bridged between the host and all other VMs - it's eth0 on openwrt

did it, still getting dropouts unfortunately

btw, I only added it after the issue surfaced

it's a capable machine, it runs Home Assistant OS, general purpose Debian, and OpenWRT.
the motivation for running OpenWRT on it, is to allow selective "forwarding" of VLANs to other rooms (in the future I will reuse the "management port" for this purpose, after the console/serial/rj45 adapter I ordered arrives), since it's located in a room with a "network distribution panel" that reaches other rooms.

Sure... based on the config:

config bridge-vlan
        option device 'br-lan'
        option vlan '1'
        list ports 'lan2:t'
        list ports 'lan3:u*'
        list ports 'lan4:u*'

config bridge-vlan
        option device 'br-lan'
        option vlan '10'
        list ports 'lan2:t'

config bridge-vlan
        option device 'br-lan'
        option vlan '11'
        list ports 'lan2:t'
...

config bridge-vlan
        option device 'br-lan'
        option vlan '12'
        list ports 'lan2:t'

This translates to:

  • lan2: trunk, all VLANs tagged (1, 10, 11, 12)
  • lan3: VLAN 1 (lan) untagged + PVID
  • lan4: VLAN 1 (lan) untagged + PVID

This doesn't align with your stated goals:

It really only differs in that port lan4 is the standard lan, not the guest network (VLAN 12).

What about swapping the locations of the x86 box and the ax3000t?

But the preferred solution is simple -- use a device that is designed to be used as a switch (the x86 box is not designed for this, especially when considering you are not running bare metal). Appropriate options for this purpose are a managed switch, or another router with OpenWrt that has a built-in switch.

The issue here isn't that the machine isn't "capable", but rather that:

  • you don't need another router on your network, you need a switch
  • OpenWrt when running bare-metal can be configured to be used as a switch
  • But virtualized OpenWrt is a beast of another nature and there are strong dependencies (as I mentioned before) with the virtual environment and the host OS.

I understand. it is clear that the straightforward path is a dedicated device.
still, I'd like to understand what's broken in this scenario.

it seems that you don't identify any issues in the network config, correct?
could it be something in the DHCP configuration? anything else I should check?

p.s. this is the proxmox bridge configuration (probably out of scope, but as a reference):

auto enp2s0
iface enp2s0 inet manual

auto vmbr0
iface vmbr0 inet static
  address 192.168.1.182
  netmask 255.255.255.0
  gateway 192.168.1.1
  bridge-ports enp2s0
  bridge-stp off
  bridge-fd 0

Yes, out of scope.

Almost certainly proxmox and/or the host OS.

Other than the things I pointed out (removing the STP, priority, and bridge empty lines; that the ax3000t wasn't actually configured to your goals), there are other things that are not quite right about the x86 config...

Your unmanaged interfaces should not have any addresses, and you can actually try bridges with direct dotted notation rather than DSA/bridge-vlan syntax. I don't think these will solve the problem, but you can try the following:

Edit br-lan like this:

config device
        option name 'br-lan'
        option type 'bridge'
        list ports 'eth0'
        list ports 'eth1.1'
        list ports 'eth2'

Create new bridges for the 3 other VLANs:

config device
        option name 'br-iot-on'
        option type 'bridge'
        list ports 'eth1.10'

config device
        option name 'br-iot-off'
        option type 'bridge'
        list ports 'eth1.11'

config device
        option name 'br-guest'
        option type 'bridge'
        list ports 'eth1.12'
        list ports 'eth3'

Delete the bridge-vlans:

Edit the networks to use the new bridges:

config interface 'lan'
        option proto 'static'
        option device 'br-lan'
        option ipaddr '192.168.1.2'
        option netmask '255.255.255.0'
        option gateway '192.168.1.1'
        list dns '192.168.1.1'
        option ip6assign '60'

config interface 'iot_online'
        option proto 'none'
        option device 'br-iot-on'

config interface 'iot_offline'
        option proto 'none'
        option device 'br-iot-off'

config interface 'guest'
        option proto 'none'
        option device 'br-guest'

Then restart and test again. If this doesn't fix it, the problem is the VM situation.

1 Like

did a quick test, it's looking better.
I'll let it run for a while and update back

after a few days of further testing, I no longer see the issue.
I'm actually stumped as to why the dotted notation, or removal of IP definitions for the vlans would solve it.
just when I thought I had grasped the simple vlans configuration..... I'll be more than happy to learn.

spoke too soon, after a power cycle of the main router, the problem resurfaced

it seems that disabling IPv6 on the client that's on the LAN port (eth2) of the x86 helps with the issue.
to clarify, the client I was having issues with, is a Windows PC (on LAN), but another Windows PC that's connected to the guest VLAN on port eth3, didn't show any issues.
furthermore, the VMs that were sitting on the bridge vmbr0 (which is eth0 on the x86 openwrt), didn't show any issues.
on the openwrt page I think I saw (can't remember for sure right now) two IPv6 addresses for the problematic Windows PC.

once I disabled IPv6 on this client, I didn't see the dropouts anymore.

to recap, the x86 configuration is:

/etc/config/network

...
config device
        option name 'br-lan'
        option type 'bridge'
        list ports 'eth0'
        list ports 'eth1.1'
        list ports 'eth2'
...
config interface 'lan'
        option proto 'static'
        option device 'br-lan'
        option ipaddr '192.168.1.2'
        option netmask '255.255.255.0'
        option gateway '192.168.1.1'
        list dns '192.168.1.1'
        option ip6assign '60'
/etc/config/dhcp

config dnsmasq
        option domainneeded '1'
        option rebind_protection '0'
        option domain 'lan'
        option cachesize '1000'
        option readethers '1'
        option leasefile '/tmp/dhcp.leases'
        option resolvfile '/tmp/resolv.conf.d/resolv.conf.auto'
        option localservice '0'
        option ednspacket_max '1232'
        option allservers '1'
        option logqueries '1'
        option boguspriv '0'

config dhcp 'lan'
        option interface 'lan'
        option ignore '1'

config dhcp 'iot_online'
        option interface 'iot_online'
        option ignore '1'

config dhcp 'iot_offline'
        option interface 'iot_offline'
        option ignore '1'

config dhcp 'guest'
        option interface 'guest'
        option ignore '1'

config dhcp 'mgmt'
        option interface 'mgmt'
        option ignore '1'

config dhcp 'wan'
        option interface 'wan'
        option ignore '1'

config odhcpd 'odhcpd'
        option maindhcp '0'
        option leasefile '/tmp/hosts/odhcpd'
        option leasetrigger '/usr/sbin/odhcpd-update'
        option loglevel '4'

I guess I should remove the entries for non existing interfaces mgmt and wan.

is there some configuration I forgot regarding IPv6 that could cause this?

This is Spaghetti Networking and I am being kind here.

You only need one Router.

Invest in managed Layer 2 Switches.

I suspect that this, like the other 'fixes' is going to be short lived.

Remember what I said...

Understood. I'll see to replace it with some minimal 4-5 ports openwrt device with a built-in switch at the nearest opportunity

OpenWrt does not have all the answers, understand its strengths and weaknesses and you will get things done a quicker.