Constant ping spikes

I used to have a very good bufferbloat in this connection but since i came back from a trip i am experiencing frequent ping spikes (600 ms) and they occur every time, its impossible to play any game online.
Below are the debugs.

cat /etc/config/sqm
config queue 'eth1'
        option debug_logging '0'
        option verbosity '5'
        option qdisc 'cake'
        option linklayer 'atm'
        option overhead '48'
        option qdisc_advanced '1'
        option squash_dscp '1'
        option squash_ingress '1'
        option ingress_ecn 'ECN'
        option egress_ecn 'NOECN'
        option qdisc_really_really_advanced '1'
        option enabled '1'
        option interface 'pppoe-wan'
        option upload '400'
        option download '9000'
        option script 'piece_of_cake.qos'
        option iqdisc_opts 'nat dual-dsthost ingress'
        option eqdisc_opts 'nat dual-srchost ack-filter'
ifstatus wan
{
        "up": true,
        "pending": false,
        "available": true,
        "autostart": true,
        "dynamic": false,
        "uptime": 75047,
        "l3_device": "pppoe-wan",
        "proto": "pppoe",
        "device": "eth0",
        "updated": [
                "addresses",
                "routes"
        ],
        "metric": 0,
        "dns_metric": 0,
        "delegation": false,
        "ipv4-address": [
                {
                        "address": "177.177.208.9",
                        "mask": 32,
                        "ptpaddress": "201.79.136.1"
                }
        ],
        "ipv6-address": [

        ],
        "ipv6-prefix": [

        ],
        "ipv6-prefix-assignment": [

        ],
        "route": [
                {
                        "target": "0.0.0.0",
                        "mask": 0,
                        "nexthop": "201.79.136.1",
                        "source": "0.0.0.0/0"
                }
        ],
        "dns-server": [
                "1.1.1.1",
                "1.0.0.1",
                "8.8.4.4"
        ],
        "dns-search": [

        ],
        "neighbors": [

        ],
        "inactive": {
                "ipv4-address": [

                ],
                "ipv6-address": [

                ],
                "route": [

                ],
                "dns-server": [
                        "200.222.145.84",
                        "200.222.122.180"
                ],
                "dns-search": [

                ],
                "neighbors": [

                ]
        },
        "data": {

        }
}
tc -s qdisc
qdisc noqueue 0: dev lo root refcnt 2
 Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
 backlog 0b 0p requeues 0
qdisc fq_codel 0: dev eth0 root refcnt 2 limit 10240p flows 1024 quantum 1514 target 5.0ms interval 100.0ms memory_limit 4Mb ecn
 Sent 721350347 bytes 4592626 pkt (dropped 0, overlimits 0 requeues 5)
 backlog 0b 0p requeues 5
  maxpacket 88 drop_overlimit 0 new_flow_count 2 ecn_mark 0
  new_flows_len 0 old_flows_len 0
qdisc fq_codel 0: dev eth1 root refcnt 2 limit 10240p flows 1024 quantum 1514 target 5.0ms interval 100.0ms memory_limit 4Mb ecn
 Sent 214922752 bytes 164065 pkt (dropped 0, overlimits 0 requeues 1)
 backlog 0b 0p requeues 1
  maxpacket 0 drop_overlimit 0 new_flow_count 0 ecn_mark 0
  new_flows_len 0 old_flows_len 0
qdisc noqueue 0: dev br-lan root refcnt 2
 Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
 backlog 0b 0p requeues 0
qdisc noqueue 0: dev eth1.1 root refcnt 2
 Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
 backlog 0b 0p requeues 0
qdisc noqueue 0: dev wlan1 root refcnt 2
 Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
 backlog 0b 0p requeues 0
qdisc cake 8079: dev pppoe-wan root refcnt 2 bandwidth 400Kbit besteffort dual-srchost nat nowash ack-filter split-gso rtt 100.0ms atm overhead 48
 Sent 11124748 bytes 111695 pkt (dropped 15576, overlimits 177638 requeues 0)
 backlog 0b 0p requeues 0
 memory used: 283808b of 4Mb
 capacity estimate: 400Kbit
 min/max network layer size:           28 /    1492
 min/max overhead-adjusted size:      106 /    1749
 average network hdr offset:            0

                  Tin 0
  thresh        400Kbit
  target         45.4ms
  interval      140.4ms
  pk_delay       14.3ms
  av_delay        2.3ms
  sp_delay         93us
  backlog            0b
  pkts           127271
  bytes        12216692
  way_inds        12053
  way_miss         1006
  way_cols            0
  drops             527
  marks               0
  ack_drop        15049
  sp_flows            0
  bk_flows            1
  un_flows            0
  max_len          1492
  quantum           300

qdisc ingress ffff: dev pppoe-wan parent ffff:fff1 ----------------
 Sent 409116600 bytes 304174 pkt (dropped 0, overlimits 0 requeues 0)
 backlog 0b 0p requeues 0
qdisc noqueue 0: dev wlan1.sta3 root refcnt 2
 Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
 backlog 0b 0p requeues 0
qdisc cake 807a: dev ifb4pppoe-wan root refcnt 2 bandwidth 9Mbit besteffort dual-dsthost nat wash ingress no-ack-filter split-gso rtt 100.0ms atm overhead 48
 Sent 388567387 bytes 289341 pkt (dropped 14833, overlimits 564257 requeues 0)
 backlog 0b 0p requeues 0
 memory used: 214272b of 4Mb
 capacity estimate: 9Mbit
 min/max network layer size:           30 /    1492
 min/max overhead-adjusted size:      106 /    1749
 average network hdr offset:            0

                  Tin 0
  thresh          9Mbit
  target          5.0ms
  interval      100.0ms
  pk_delay       36.2ms
  av_delay       26.9ms
  sp_delay        938us
  backlog            0b
  pkts           304174
  bytes       409116600
  way_inds          123
  way_miss          961
  way_cols            0
  drops           14833
  marks               0
  ack_drop            0
  sp_flows            2
  bk_flows            1
  un_flows            0
  max_len          1492
  quantum           300

This all looks pretty okay, and no indicators of ~600ms delays from the cake statistics.
Could you try:

opkg update
opkg install mtr
mtr -ebz4 8.8.8.8

Then look at mtr's on-line updates, if after some uptime packet loss and worst case RTT show a step-like increase from a specific hop on that would indicate overload/congestion upstream in your providers network, something quite hard for sqm to fix/control.
Maybe run mtr -ezb4w -c 10 8.8.8.8 and post the output here, then we can have a look together...

Ok, so this is the output of: mtr -ezb4w -c 10 8.8.8.8

Start: 2020-03-30T11:07:32+0000
HOST: OpenWrt                                  Loss%   Snt   Last   Avg  Best  Wrst StDev
@Not a TXT record
  1. AS???    ???                              100.0    10    0.0   0.0   0.0   0.0   0.0
@Not a TXT record
  2. AS???    100.122.50.185 (100.122.50.185)   0.0%    10   19.1  75.8  18.4 589.6 180.5
@Not a TXT record
  3. AS???    100.122.22.200 (100.122.22.200)   0.0%    10   31.2  77.3  30.4 493.7 146.3
@Not a TXT record
  4. AS???    100.122.18.229 (100.122.18.229)  70.0%    10   37.6  38.1  37.6  38.4   0.4
@Not a TXT record
  5. AS???    100.122.18.108 (100.122.18.108)   0.0%    10   35.5  62.0  35.2 281.4  77.3
  6. AS15169  72.14.197.29 (72.14.197.29)       0.0%    10   40.8  50.5  36.2 173.7  43.3
  7. AS15169  108.170.232.21 (108.170.232.21)  10.0%    10   40.2  37.2  36.2  40.2   1.2
  8. AS15169  108.170.228.5 (108.170.228.5)     0.0%    10   40.3  40.6  39.9  41.4   0.4
  9. AS15169  dns.google (8.8.8.8)              0.0%    10   43.7  44.0  42.8  49.7   2.0
1 Like

Although intermediate sites responded erratically, the final destination was consistent with stddev of 2ms.

This suggests there is no problem either at your uplink, or anywhere along that particular path. How about run a speed test and during the test run mtr continuously... See how that differs

1 Like

I ran mtr continuously while did some speed tests at dslreports

Host                             @Not a TXT record                                                                                                  Loss%   Snt   Last   Avg  Best  Wrst StDev
 1. (waiting for reply)                            @Not a TXT record
 2. AS???    100.122.50.185 (100.122.50.185)                        @Not a TXT record                                                                 0.0%   116   53.4  27.0  17.9  64.0  11.8
 3. AS???    100.122.22.200 (100.122.22.200)                                         @Not a TXT record                                                0.9%   116   30.8  48.8  30.2 632.6  76.1
 4. AS???    100.122.18.229 (100.122.18.229)                                                          @Not a TXT record                              59.1%   116   74.4  45.8  37.5  75.1  11.6
 5. AS???    100.122.18.108 (100.122.18.108)                                                                           @Not a TXT record              0.9%   116  171.1 106.8  36.3 966.1 115.8
 6. AS15169  72.14.197.29 (72.14.197.29)                                                                                                              0.0%   116   36.2  52.2  35.6 600.8  66.8
 7. AS15169  108.170.232.21 (108.170.232.21)                                                                                                          0.0%   115   64.4  49.1  35.7 591.6  52.5
 8. AS15169  108.170.228.5 (108.170.228.5)                                                                                                            0.0%   115   40.9  44.5  36.2  98.6  12.6
 9. AS15169  dns.google (8.8.8.8)                                                                                                                     0.0%   115   70.6  64.9  40.2 640.9  92.4
1 Like

Yep, you've got problems. What device are you running on?

TP-LINK Archer C60 v2 is my router and a Zyxel AMG 1302 modem in bridge mode.

can you post a link to the dslreports test results from your run?

Speeds are a bit down because people are using the internet.

1 Like

well, sure, but I'd say you probably need to tune the speeds more carefully as your first step. Basically to do that you need a wired client, as the only thing on your network. When you get a break and no-one else needs the network for about 2 mins, connect via a cable, turn off the WiFi temporarily, and run a dslreports test and post the results, you can then return to normal wifi + activity.

1 Like

I think it indicates a somewhat intermittent problem.

Mmmh, looking at the details of that speedtest, I see that it measures against servers in the US, which might be a tad far from brazil and also potentially not well-connected to your ISP's network...
It seems that dslreports is not an ideal measurement system for you (maybe you could re-try somewhen in the local low traffic hours between 0:00 and ~5:00 o'clock?)

or try a different measurement plattform. Netflix's fast.con is not terrible and can be configured to test upload and download "loaded" ping, (and you can always run a concurrent mtr test in a terminal during a test and monitor the RTT development there "by eye").

Did some tests at 2:00 am local time
SQM off, wired connection, wifi off > http://www.dslreports.com/speedtest/61424304
SQM on, wired, wifi off > http://www.dslreports.com/speedtest/61424351
SQM on, wired, wifi on, with local servers > http://www.dslreports.com/speedtest/61424712

fast.com speed test with 8 to 16 streams, minimun 60s duration
dl: 5.3mbps up: 290 kpbs
unloaded 53 ms
loaded 76 ms

and the mtr log after some tests:

Host                             @Not a TXT record                                                         Loss%   Snt   Last   Avg  Best  Wrst StDev
                                                   @Not a TXT record
                  1. (waiting for reply)                            @Not a TXT record                        0.0%    86   22.4  26.4  21.9  72.5   8.8
 3. AS???    100.122.19.129 (100.122.19.129)                                         @Not a TXT record       1.2%    86   40.3  44.4  39.1  89.4   8.8
 4. AS???    100.122.17.178 (100.122.17.178)                                                          @Not a TX5 recor6   47.5  58.7  46.5 646.1  65.7
 5. AS???    100.122.25.87 (100.122.25.87)                                                                   0.0%    86@Not a1  61.1cord.8 633.2  71.5
 6. AS15169  72.14.218.158 (72.14.218.158)                                                                   0.0%    85   37.5  53.1  36.0 623.1  75.8
 7. AS15169  108.170.251.65 (108.170.251.65)                                                                 1.2%    85   37.1  42.5  37.1  81.4   9.3
 8. AS15169  108.170.228.5 (108.170.228.5)                                                                   0.0%    85   41.9  45.8  41.1 107.1  10.3
 9. AS15169  dns.google (8.8.8.8)                                                                            0.0%    85   37.7  48.2  36.8 632.8  64.7

That with local servers stuff suggests that SQM is working ok for you. It's very possible with distant servers that various high traffic interchanges are experiencing congestion, something you can't control.

The problem is that even with local servers the bufferbloat meter spikes to 500+ ms for 1 second and then comes back, and somehow it doenst affect the result much.

In any game i try to play (in local servers) i cant move properly because every 2 seconds my ping goes for 600ms and comes back everytime.

Where are the servers you're playing off of? Local to you? Or remote?

If everything you're trying to access is on the "other side" of some national choke point, then the fact that the choke point is choked is unavoidable. With all the video conferencing and soforth going on due to COVID it would be unsurprising to hear that the entire country of Brazil has several highly congested overseas links and nothing can be done about it.

When you do your test to "local servers" no such high spikes were recorded in your dslreports test. This suggests that the issue may be international underseas fiber cables etc, which you simply have no control over. There may be other explanations too... but this is a reasonable first guess.

1 Like

They are here in brazil São Paulo, im from another state close

I can see it in the meter but its fast, they also appear in the mtr test.
I will record a gif to send.

testnet

It is very possible, but the problem is that all my friends internet are stable and i didnt see any complaint in my isp forum discussions.

was that test on wifi? The situation where it shifts from different levels is typical of changing wifi modulation. sudden ping spikes on wifi are often caused by background processes doing scans or whatever. There's a lot more going on when you're on wifi. If you're not on wifi, then those variations in bandwidth suggest something really odd going on. Do you have a mobile phone network based WAN?

Yes, but i just tested with wired, and its the same, the bufferbloat spike just like in the gif, with variations in bandwidth.

Like a mobile plan?

I meant what technology does your ISP use? Is it fiber, DSL, cable, wireless mobile, what?