DNS funny business

I had been happily using Quad9 for some time, until earlier this month, web pages started loading slowly, incompletely, or failing altogether (browser reports "cannot find server".) Issues with any given web site are intermittent and inconsistent, but overall it happens frequently and is quite annoying. Speedtests are normal.

If I change to Cloudflare, OpenDNS, or Google DNS, the issue goes away. Switch back to Quad9 and the issue is immediately apparent.

I do not think I can point a finger at either Quad9 or OpenWrt (router was unchanged), but I am grasping at straws so I hope folks can suggest some trees I could bark up. I think I have a fairly vanilla configuration.

# ubus call system board
{
	"kernel": "5.15.167",
	"hostname": "mt61",
	"system": "ARMv8 Processor rev 4",
	"model": "GL.iNet GL-MT6000",
	"board_name": "glinet,gl-mt6000",
	"rootfs_type": "squashfs",
	"release": {
		"distribution": "OpenWrt",
		"version": "23.05.5",
		"revision": "r24106-10cc5fcd00",
		"target": "mediatek/filogic",
		"description": "OpenWrt 23.05.5 r24106-10cc5fcd00"
	}
}
# cat /etc/config/network
config interface 'loopback'
    option device 'lo'
    option proto 'static'
    option ipaddr '127.0.0.1'
    option netmask '255.0.0.0'

config globals 'globals'
    option ula_prefix <redacted>

config device
    option name 'br-lan'
    option type 'bridge'
    list ports 'lan1'
    list ports 'lan2'
    list ports 'lan3'
    list ports 'lan4'
    list ports 'lan5'

config device
    option name 'lan1'
    option macaddr <redacted>

config device
    option name 'lan2'
    option macaddr <redacted>

config device
    option name 'lan3'
    option macaddr <redacted>

config device
    option name 'lan4'
    option macaddr <redacted>

config device
    option name 'lan5'
    option macaddr <redacted>

config interface 'lan'
    option device 'br-lan'
    option proto 'static'
    option ipaddr '192.168.1.1'
    option netmask '255.255.255.0'
    option ip6assign '64'
    option ip6hint '1'
    list dns '9.9.9.9'
    list dns '149.112.112.112'
    list dns '2620:fe::fe'
    list dns '2620:fe::9'

config device
    option name 'eth1'
    option macaddr <redacted>

config interface 'wan'
    option device 'eth1'
    option proto 'dhcp'
    option peerdns '0'

config interface 'wan6'
    option device 'eth1'
    option proto 'dhcpv6'
    option peerdns '0'
    option reqaddress 'try'
    option reqprefix '60'

config device 'guest_dev'
    option type 'bridge'
    option name 'br-guest'

config interface 'guest'
    option proto 'static'
    option device 'br-guest'
    option ipaddr '192.168.2.1'
    option netmask '255.255.255.0'
    option ip6assign '64'
    option ip6hint '2'
    list dns '9.9.9.9'
    list dns '149.112.112.112'
    list dns '2620:fe::fe'
    list dns '2620:fe::9'
# cat /etc/config/dhcp
config dnsmasq
    option domainneeded '1'
    option localise_queries '1'
    option rebind_protection '1'
    option rebind_localhost '1'
    option local '/lan/'
    option domain 'lan'
    option expandhosts '1'
    option cachesize '1000'
    option authoritative '1'
    option readethers '1'
    option leasefile '/tmp/dhcp.leases'
    option resolvfile '/tmp/resolv.conf.d/resolv.conf.auto'
    option localservice '1'
    option ednspacket_max '1232'

config dhcp 'lan'
    option interface 'lan'
    option start '100'
    option limit '150'
    option leasetime '12h'
    option dhcpv4 'server'
    option ra 'server'
    option ra_flags 'none'

config dhcp 'wan'
    option interface 'wan'
    option ignore '1'

config odhcpd 'odhcpd'
    option maindhcp '0'
    option leasefile '/tmp/hosts/odhcpd'
    option leasetrigger '/usr/sbin/odhcpd-update'
    option loglevel '4'

config dhcp 'guest'
    option interface 'guest'
    option start '100'
    option limit '150'
    option leasetime '4h'
    option dhcpv4 'server'
    option ra 'server'
    option ra_flags 'none'

config host
    option name 'printer'
    option ip '192.168.1.99'
    option mac <redacted>

config host
    list mac <redacted>
    option ip '192.168.1.98'
# cat /etc/config/wireless
config wifi-device 'radio0'
    option type 'mac80211'
    option path 'platform/soc/18000000.wifi'
    option channel '1'
    option band '2g'
    option htmode 'HE20'
    option country 'US'
    option cell_density '0'

config wifi-iface 'default_radio0'
    option device 'radio0'
    option network 'lan'
    option mode 'ap'
    option ssid <redacted>
    option encryption 'psk2+ccmp'
    option key <redacted>

config wifi-device 'radio1'
    option type 'mac80211'
    option path 'platform/soc/18000000.wifi+1'
    option channel 'auto'
    option band '5g'
    option htmode 'HE80'
    option country 'US'

config wifi-iface 'default_radio1'
    option device 'radio1'
    option network 'lan'
    option mode 'ap'
    option ssid <redacted>
    option encryption 'psk2+ccmp'
    option key <redacted>

config wifi-iface 'guest2'
    option device 'radio0'
    option mode 'ap'
    option network 'guest'
    option ssid <redacted>
    option encryption 'psk2+ccmp'
    option key <redacted>
    option disabled '1'

config wifi-iface 'guest5'
    option device 'radio1'
    option mode 'ap'
    option network 'guest'
    option ssid <redacted>
    option encryption 'psk2+ccmp'
    option key <redacted>
    option disabled '1'
# cat /etc/config/firewall
config defaults
    option syn_flood '1'
    option input 'DROP'
    option output 'ACCEPT'
    option forward 'DROP'
    option synflood_protect '1'
    option drop_invalid '1'

config zone
    option name 'lan'
    list network 'lan'
    option input 'ACCEPT'
    option output 'ACCEPT'
    option forward 'ACCEPT'

config zone
    option name 'wan'
    list network 'wan'
    list network 'wan6'
    option input 'DROP'
    option output 'ACCEPT'
    option forward 'DROP'
    option masq '1'
    option mtu_fix '1'

config forwarding
    option src 'lan'
    option dest 'wan'

config rule
    option name 'Allow-DHCP-Renew'
    option src 'wan'
    option proto 'udp'
    option dest_port '68'
    option target 'ACCEPT'
    option family 'ipv4'

config rule
    option name 'Allow-Ping'
    option src 'wan'
    option proto 'icmp'
    option icmp_type 'echo-request'
    option family 'ipv4'
    option target 'DROP'

config rule
    option name 'Allow-IGMP'
    option src 'wan'
    option proto 'igmp'
    option family 'ipv4'
    option target 'ACCEPT'

config rule
    option name 'Allow-DHCPv6'
    option src 'wan'
    option proto 'udp'
    option dest_port '546'
    option family 'ipv6'
    option target 'ACCEPT'

config rule
    option name 'Allow-MLD'
    option src 'wan'
    option proto 'icmp'
    option src_ip 'fe80::/10'
    list icmp_type '130/0'
    list icmp_type '131/0'
    list icmp_type '132/0'
    list icmp_type '143/0'
    option family 'ipv6'
    option target 'ACCEPT'

config rule
    option name 'Allow-ICMPv6-Input'
    option src 'wan'
    option proto 'icmp'
    list icmp_type 'echo-request'
    list icmp_type 'echo-reply'
    list icmp_type 'destination-unreachable'
    list icmp_type 'packet-too-big'
    list icmp_type 'time-exceeded'
    list icmp_type 'bad-header'
    list icmp_type 'unknown-header-type'
    list icmp_type 'router-solicitation'
    list icmp_type 'neighbour-solicitation'
    list icmp_type 'router-advertisement'
    list icmp_type 'neighbour-advertisement'
    option limit '1000/sec'
    option family 'ipv6'
    option target 'ACCEPT'

config rule
    option name 'Allow-ICMPv6-Forward'
    option src 'wan'
    option dest '*'
    option proto 'icmp'
    list icmp_type 'echo-request'
    list icmp_type 'echo-reply'
    list icmp_type 'destination-unreachable'
    list icmp_type 'packet-too-big'
    list icmp_type 'time-exceeded'
    list icmp_type 'bad-header'
    list icmp_type 'unknown-header-type'
    option limit '1000/sec'
    option family 'ipv6'
    option target 'ACCEPT'

config rule
    option name 'Allow-IPSec-ESP'
    option src 'wan'
    option dest 'lan'
    option proto 'esp'
    option target 'ACCEPT'

config rule
    option name 'Allow-ISAKMP'
    option src 'wan'
    option dest 'lan'
    option dest_port '500'
    option proto 'udp'
    option target 'ACCEPT'

config zone 'guest'
    option name 'guest'
    option network 'guest'
    option input 'DROP'
    option output 'ACCEPT'
    option forward 'DROP'

config forwarding 'guest_wan'
    option src 'guest'
    option dest 'wan'

config rule 'guest_dns'
    option name 'Allow-DNS-Guest'
    option src 'guest'
    option dest_port '53'
    option proto 'tcp udp'
    option target 'ACCEPT'

config rule 'guest_dhcp'
    option name 'Allow-DHCP-Guest'
    option src 'guest'
    option dest_port '67'
    option proto 'udp'
    option family 'ipv4'
    option target 'ACCEPT'

config rule 'guest_icmpv6'
    option name 'Allow-ICMPv6-Input-Guest'
    option family 'ipv6'
    list proto 'icmp'
    option src 'guest'
    option target 'ACCEPT'
    option limit '10/second'

config rule 'guest_dhcpv6'
    option name 'Allow-DHCPv6-Guest'
    option family 'ipv6'
    option proto 'udp'
    option src 'guest'
    option src_ip 'fc00::/6'
    option dest_ip 'fc00::/6'
    option dest_port '547'
    option target 'ACCEPT'
1 Like

Do you definitely have a public IPv6 address on your WAN connection? If not, or it's unreliable, then the DNS config shouldn't use IPv6 addresses.

Yes, and for quite a long time now. I did also try just using the Quad9 IPv4 addresses, and the issue persisted. Thanks!
PS: Also, Cloudflare and Google DNS work fine with IPv6 DNS servers.

Hrm...

I think you should probably reconfigure things so the dnsmasq instance is doing all your resolving.

Set dnsmasq to forward queries to the Quad9 IP address, set the custom DNS for each network to be the OpenWRT IP address on each network, and set the DHCP server for each network to hand out the router's IP to clients as the DNS server.

That way your dnsmasq instance will cache results and a single rogue/broken/busy device or piece of code on your network is less likely to cause you to hit any of Quad9's rate limits - although I think you'd have to be way into the thousands of QPS to get to that!

Thanks. I am afraid some of that went over my head. I may not understand where the resolving is happening now, or how I would change it. My clients do have the router's IP as their DNS server; I assumed that it was caching DNS results.

For starters, set up the upstream DNS servers the right way - on the wan interface(s) or use DNS forwarding.

After some time check the number of retried or failed queries:

PID=`pidof dnsmasq` && kill -USR1 $PID; logread | grep dnsmasq | grep server

For troubleshooting, you can also enable DNS query logging.

Thank you, I will give it a try. I was not aware of the "right way." I had asked about this in the past, I must have come away with the wrong understanding, or that it didn't matter.

Should I configure just IPv4 servers on the wan interface, and just IPv6 servers on wan6, or should both interfaces have both?

Logging the dnsmasq info with SIGUSR1 is interesting. After configuring the DNS servers on wan and wan6, the issue remains, and I see quite a few retries:

daemon.info dnsmasq[1]:
server 9.9.9.9#53: queries sent 205, retried 73, failed 0, nxdomain replies 3, avg. latency 18ms
server 149.112.112.112#53: queries sent 582, retried 264, failed 0, nxdomain replies 9, avg. latency 80ms
server 2620:fe::fe#53: queries sent 231, retried 0, failed 1, nxdomain replies 0, avg. latency 100ms
server 2620:fe::9#53: queries sent 70, retried 7, failed 2, nxdomain replies 0, avg. latency 22ms
# cat /etc/config/network
config interface 'loopback'
    option device 'lo'
    option proto 'static'
    option ipaddr '127.0.0.1'
    option netmask '255.0.0.0'

config globals 'globals'
    option ula_prefix <redacted>

config device
    option name 'br-lan'
    option type 'bridge'
    list ports 'lan1'
    list ports 'lan2'
    list ports 'lan3'
    list ports 'lan4'
    list ports 'lan5'

config device
    option name 'lan1'
    option macaddr <redacted>

config device
    option name 'lan2'
    option macaddr <redacted>

config device
    option name 'lan3'
    option macaddr <redacted>

config device
    option name 'lan4'
    option macaddr <redacted>

config device
    option name 'lan5'
    option macaddr <redacted>

config interface 'lan'
    option device 'br-lan'
    option proto 'static'
    option ipaddr '192.168.1.1'
    option netmask '255.255.255.0'
    option ip6assign '64'
    option ip6hint '1'

config device
    option name 'eth1'
    option macaddr <redacted>

config interface 'wan'
    option device 'eth1'
    option proto 'dhcp'
    option peerdns '0'
    list dns '9.9.9.9'
    list dns '149.112.112.112'
    list dns '2620:fe::fe'
    list dns '2620:fe::9'

config interface 'wan6'
    option device 'eth1'
    option proto 'dhcpv6'
    option peerdns '0'
    option reqaddress 'try'
    option reqprefix '60'
    list dns '2620:fe::fe'
    list dns '2620:fe::9'
    list dns '9.9.9.9'
    list dns '149.112.112.112'

config device 'guest_dev'
    option type 'bridge'
    option name 'br-guest'

config interface 'guest'
    option proto 'static'
    option device 'br-guest'
    option ipaddr '192.168.2.1'
    option netmask '255.255.255.0'
    option ip6assign '64'
    option ip6hint '2'

I hope I have the config issues sorted now (see at bottom.) However, the issues persist with Quad9, but not with OpenDNS.

With Quad9, dnsmasq shows many retries, and some failures. With OpenDNS, some failures. I am not sure how much would be considered normal. But when using a browser, issues are not apparent with OpenDNS.

I wonder if I should talk with my ISP.

Thanks for your attention.

daemon.info dnsmasq[1]:
time 1734967591
cache size 1000, 0/1619 cache insertions re-used unexpired cache entries.
queries forwarded 928, queries answered locally 624
pool memory in use 2688, max 3600, allocated 4800
child processes for TCP requests: in use 0, highest since last SIGUSR1 0, max allowed 20.
server 9.9.9.9#53: queries sent 709, retried 262, failed 19, nxdomain replies 12, avg. latency 24ms
server 149.112.112.112#53: queries sent 152, retried 33, failed 13, nxdomain replies 0, avg. latency 18ms
server 2620:fe::fe#53: queries sent 155, retried 0, failed 6, nxdomain replies 1, avg. latency 22ms
server 2620:fe::9#53: queries sent 182, retried 0, failed 7, nxdomain replies 12, avg. latency 26ms
daemon.info dnsmasq[1]:
time 1734966335
cache size 1000, 0/2796 cache insertions re-used unexpired cache entries.
queries forwarded 2004, queries answered locally 693
pool memory in use 7728, max 8736, allocated 9600
child processes for TCP requests: in use 0, highest since last SIGUSR1 0, max allowed 20.
server 208.67.222.222#53: queries sent 989, retried 0, failed 193, nxdomain replies 0, avg. latency 49ms
server 208.67.220.220#53: queries sent 1650, retried 0, failed 86, nxdomain replies 7, avg. latency 34ms
server 2620:119:35::35#53: queries sent 789, retried 0, failed 88, nxdomain replies 0, avg. latency 74ms
server 2620:119:53::53#53: queries sent 790, retried 0, failed 142, nxdomain replies 5, avg. latency 39ms

I also notice occasional time outs with Quad9:

$ time dig @9.9.9.9 google.com
;; communications error to 9.9.9.9#53: timed out

; <<>> DiG 9.18.28-0ubuntu0.24.04.1-Ubuntu <<>> @9.9.9.9 google.com
; (1 server found)
;; global options: +cmd
;; Got answer:
;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 1285
;; flags: qr rd ra; QUERY: 1, ANSWER: 1, AUTHORITY: 0, ADDITIONAL: 1

;; OPT PSEUDOSECTION:
; EDNS: version: 0, flags:; udp: 1232
;; QUESTION SECTION:
;google.com.            IN  A

;; ANSWER SECTION:
google.com.     217 IN  A   172.217.4.46

;; Query time: 17 msec
;; SERVER: 9.9.9.9#53(9.9.9.9) (UDP)
;; WHEN: Mon Dec 23 09:46:19 EST 2024
;; MSG SIZE  rcvd: 55

real    0m5.044s
user    0m0.003s
sys 0m0.009s
$
# cat /etc/config/network

config interface 'loopback'
    option device 'lo'
    option proto 'static'
    option ipaddr '127.0.0.1'
    option netmask '255.0.0.0'

config globals 'globals'
    option ula_prefix '<redacted>'

config device
    option name 'br-lan'
    option type 'bridge'
    list ports 'lan1'
    list ports 'lan2'
    list ports 'lan3'
    list ports 'lan4'
    list ports 'lan5'

config device
    option name 'lan1'
    option macaddr '<redacted>'

config device
    option name 'lan2'
    option macaddr '<redacted>'

config device
    option name 'lan3'
    option macaddr '<redacted>'

config device
    option name 'lan4'
    option macaddr '<redacted>'

config device
    option name 'lan5'
    option macaddr '<redacted>'

config interface 'lan'
    option device 'br-lan'
    option proto 'static'
    option ipaddr '192.168.1.1'
    option netmask '255.255.255.0'
    option ip6assign '64'
    option ip6hint '1'

config device
    option name 'eth1'
    option macaddr '<redacted>'

config interface 'wan'
    option device 'eth1'
    option proto 'dhcp'
    option peerdns '0'
    list dns '9.9.9.9'
    list dns '149.112.112.112'

config interface 'wan6'
    option device 'eth1'
    option proto 'dhcpv6'
    option peerdns '0'
    option reqaddress 'try'
    option reqprefix '60'
    list dns '2620:fe::fe'
    list dns '2620:fe::9'

config device 'guest_dev'
    option type 'bridge'
    option name 'br-guest'

config interface 'guest'
    option proto 'static'
    option device 'br-guest'
    option ipaddr '192.168.2.1'
    option netmask '255.255.255.0'
    option ip6assign '64'
    option ip6hint '2'