ipq40XX: low performance when sending traffic?

"Reverting" https://github.com/openwrt/openwrt/commit/9da2b567605b0964d921b9ca4f0c9886db4f636d#diff-043b5b3b3004fa40ab2e96f33c6be34d6dd190b4b281ad40e320a2cf40504b74 fixes everything. o.O

You can not easily just doing a "revert". Here is a patch:

diff --git a/target/linux/ipq40xx/files-5.4/drivers/net/ethernet/qualcomm/essedma/edma_axi.c b/target/linux/ipq40xx/files-5.4/drivers/net/ethernet/qualcomm/essedma/edma_axi.c
index 50335b0d14..96a82b3116 100644
--- a/target/linux/ipq40xx/files-5.4/drivers/net/ethernet/qualcomm/essedma/edma_axi.c
+++ b/target/linux/ipq40xx/files-5.4/drivers/net/ethernet/qualcomm/essedma/edma_axi.c
@@ -968,6 +968,7 @@ static int edma_axi_probe(struct platform_device *pdev)
 		edma_netdev[i]->netdev_ops = &edma_axi_netdev_ops;
 		edma_netdev[i]->max_mtu = 9000;
 		edma_netdev[i]->features = NETIF_F_HW_CSUM | NETIF_F_RXCSUM
+				      | NETIF_F_HW_VLAN_CTAG_TX
 				      | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_SG |
 				      NETIF_F_TSO | NETIF_F_GRO;
 		edma_netdev[i]->hw_features = NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
@@ -979,10 +980,10 @@ static int edma_axi_probe(struct platform_device *pdev)
 					     NETIF_F_TSO | NETIF_F_GRO;
 
 #ifdef CONFIG_RFS_ACCEL
-		edma_netdev[i]->features |=  NETIF_F_NTUPLE;
-		edma_netdev[i]->hw_features |=  NETIF_F_NTUPLE;
-		edma_netdev[i]->vlan_features |= NETIF_F_NTUPLE;
-		edma_netdev[i]->wanted_features |= NETIF_F_NTUPLE;
+		edma_netdev[i]->features |=  NETIF_F_RXHASH | NETIF_F_NTUPLE;
+		edma_netdev[i]->hw_features |=  NETIF_F_RXHASH | NETIF_F_NTUPLE;
+		edma_netdev[i]->vlan_features |= NETIF_F_RXHASH | NETIF_F_NTUPLE;
+		edma_netdev[i]->wanted_features |= NETIF_F_RXHASH | NETIF_F_NTUPLE;
 #endif
 		edma_set_ethtool_ops(edma_netdev[i]);
 
diff --git a/target/linux/ipq40xx/files-5.4/drivers/net/phy/ar40xx.c b/target/linux/ipq40xx/files-5.4/drivers/net/phy/ar40xx.c
index db21547a03..fab8323d2c 100644
--- a/target/linux/ipq40xx/files-5.4/drivers/net/phy/ar40xx.c
+++ b/target/linux/ipq40xx/files-5.4/drivers/net/phy/ar40xx.c
@@ -1200,11 +1200,7 @@ ar40xx_init_port(struct ar40xx_priv *priv, int port)
 	ar40xx_rmw(priv, AR40XX_REG_PORT_STATUS(port),
 			AR40XX_PORT_AUTO_LINK_EN, 0);
 
-	/* CPU port is setting headers to limit output ports */
-	if (port == 0)
-		ar40xx_write(priv, AR40XX_REG_PORT_HEADER(port), 0x8);
-	else
-		ar40xx_write(priv, AR40XX_REG_PORT_HEADER(port), 0);
+	ar40xx_write(priv, AR40XX_REG_PORT_HEADER(port), 0);
 
 	ar40xx_write(priv, AR40XX_REG_PORT_VLAN0(port), 0);
 
@@ -1247,10 +1243,6 @@ ar40xx_init_globals(struct ar40xx_priv *priv)
 	t = (AR40XX_PORT0_FC_THRESH_ON_DFLT << 16) |
 	      AR40XX_PORT0_FC_THRESH_OFF_DFLT;
 	ar40xx_write(priv, AR40XX_REG_PORT_FLOWCTRL_THRESH(0), t);
-
-	/* set service tag to 802.1q */
-	t = ETH_P_8021Q | AR40XX_ESS_SERVICE_TAG_STAG;
-	ar40xx_write(priv, AR40XX_ESS_SERVICE_TAG, t);
 }
 
 static void
@@ -1576,11 +1568,7 @@ ar40xx_setup_port(struct ar40xx_priv *priv, int port, u32 members)
 	u32 pvid = priv->vlan_id[priv->pvid[port]];
 
 	if (priv->vlan) {
-		if (priv->vlan_tagged & BIT(port))
-			egress = AR40XX_PORT_VLAN1_OUT_MODE_TAG;
-		else
-			egress = AR40XX_PORT_VLAN1_OUT_MODE_UNMOD;
-
+		egress = AR40XX_PORT_VLAN1_OUT_MODE_UNMOD;
 		ingress = AR40XX_IN_SECURE;
 	} else {
 		egress = AR40XX_PORT_VLAN1_OUT_MODE_UNTOUCH;
@@ -1591,17 +1579,8 @@ ar40xx_setup_port(struct ar40xx_priv *priv, int port, u32 members)
 	t |= pvid << AR40XX_PORT_VLAN0_DEF_CVID_S;
 	ar40xx_write(priv, AR40XX_REG_PORT_VLAN0(port), t);
 
-	t = egress << AR40XX_PORT_VLAN1_OUT_MODE_S;
-
-	/* set CPU port to core port */
-	if (port == 0)
-		t |= AR40XX_PORT_VLAN1_CORE_PORT;
-
-	if (priv->vlan_tagged & BIT(port))
-		t |= AR40XX_PORT_VLAN1_PORT_VLAN_PROP;
-	else
-		t |= AR40XX_PORT_VLAN1_PORT_TLS_MODE;
-
+	t = AR40XX_PORT_VLAN1_PORT_VLAN_PROP;
+	t |= egress << AR40XX_PORT_VLAN1_OUT_MODE_S;
 	ar40xx_write(priv, AR40XX_REG_PORT_VLAN1(port), t);
 
 	t = members;
@@ -2042,12 +2021,6 @@ static int ar40xx_probe(struct platform_device *pdev)
 	/* register switch */
 	swdev = &priv->dev;
 
-	if (priv->mii_bus == NULL) {
-		dev_err(&pdev->dev, "Probe failed - Missing PHYs!\n");
-		ret = -ENODEV;
-		goto err_missing_phy;
-	}
-
 	swdev->alias = dev_name(&priv->mii_bus->dev);
 
 	swdev->cpu_port = AR40XX_PORT_CPU;
@@ -2079,7 +2052,6 @@ err_unregister_switch:
 	unregister_switch(&priv->dev);
 err_unregister_phy:
 	phy_driver_unregister(&ar40xx_phy_driver);
-err_missing_phy:
 	platform_set_drvdata(pdev, NULL);
 	return ret;
 }
diff --git a/target/linux/ipq40xx/files-5.4/drivers/net/phy/ar40xx.h b/target/linux/ipq40xx/files-5.4/drivers/net/phy/ar40xx.h
index 7ba40ccf75..722bf6ae4b 100644
--- a/target/linux/ipq40xx/files-5.4/drivers/net/phy/ar40xx.h
+++ b/target/linux/ipq40xx/files-5.4/drivers/net/phy/ar40xx.h
@@ -151,9 +151,6 @@ struct ar40xx_mib_desc {
 #define   AR40XX_MIB_FUNC_NO_OP		0x0
 #define   AR40XX_MIB_FUNC_FLUSH		0x1
 
-#define AR40XX_ESS_SERVICE_TAG		0x48
-#define AR40XX_ESS_SERVICE_TAG_STAG	BIT(17)
-
 #define AR40XX_REG_PORT_STATUS(_i)		(0x07c + (_i) * 4)
 #define   AR40XX_PORT_SPEED			BITS(0, 2)
 #define   AR40XX_PORT_STATUS_SPEED_S	0
@@ -182,8 +179,6 @@ struct ar40xx_mib_desc {
 #define   AR40XX_PORT_VLAN0_DEF_CVID_S		16
 
 #define AR40XX_REG_PORT_VLAN1(_i)		(0x424 + (_i) * 0x8)
-#define   AR40XX_PORT_VLAN1_CORE_PORT		BIT(9)
-#define   AR40XX_PORT_VLAN1_PORT_TLS_MODE	BIT(7)
 #define   AR40XX_PORT_VLAN1_PORT_VLAN_PROP	BIT(6)
 #define   AR40XX_PORT_VLAN1_OUT_MODE		BITS(12, 2)
 #define   AR40XX_PORT_VLAN1_OUT_MODE_S		12