- User reboots the device, for what ever reason.
- Instead of actually rebooting the device gets stuck in DDR calibration due to too low core voltage (0.9V instead of 1.0V or 1.1V).
- User doesn't notice or decides to wait what happens and lets the device sit there in this state for a while.
- After a longer amount of time, the users tries to power cycle, then decides to wire up serial console and it shows what we are seeing now.
My hypothesis is that keeping the device at the 3rd step above for a longer time might break the RAM.
This sequence seems to almost exactly match what happened to one of my new Belkin RT3200. I got 3 of them in November 2023. A few days after getting each setup with OpenWRT, I was working on getting site-to-site VPN setup on the main gateway router. After installing some packages, I rebooted from the web interface. I waited about 20-30 min, but the router never came back up. I yanked the power cord, but after plugging back in, there was no LED activity at all. After troubleshooting for a few days and trying TFTP and other recovery methods with no success, I opened the case up and wired a Raspberry Pi UART to the serial pins. This output printed in a loop:
F0: 102B 0000
F6: 0000 0000
V0: 0000 0000 [0001]
00: 0000 0000
BP: 0400 0041 [0000]
G0: 1190 0000
T0: 0000 02D5 [000F]
Jump to BL
NOTICE: BL2: v2.9(release):OpenWrt v2023-07-24-00ac6db3-2 (mt7622-snand-1ddr)
NOTICE: BL2: Built : 21:45:35, Oct 9 2023
NOTICE: CPU: MT7622
NOTICE: WDT: Cold boot
NOTICE: WDT: disabled
NOTICE: SPI-NAND: FM35Q1GA (128MB)
ERROR: BL2: Failed to load image id 5 (-2)
Searching for this error, I stumbled upon this post, and subsequently ordered the pogo pin connector (double row, 1.27mm, 10 pins) to wire up JTAG and work on recovering the bootloader using instructions in that post.
After much trial and error with the wiring and configuration, I was able to use OpenOCD and telnet to open a debug session. However, I have been unsuccessful in getting U-Boot running. I used daniel's procedures (thank you!).
After running mt7622_reset
(via telnet):
Failed to read ESR_EL3 register
After running mt7622_ddrinit
:
Failed to write memory at 0x80810080
Here's the full OpenOCD config I am using for Raspberry Pi 4 as the OpenOCD host.
# Belkin RT3200
# from raspberrypi2-native.cfg: OpenOCD interface on RPi v2+
# Use RPi GPIO pins
adapter driver bcm2835gpio
# Base address of I/O port
bcm2835gpio_peripheral_base 0x3F000000
# Clock scaling
bcm2835gpio_speed_coeffs 146203 36
# JTAG tck tms tdi tdo
# Header pin numbers: 23 22 19 21
bcm2835gpio_jtag_nums 11 25 10 9
# If you define trst or srst, use appropriate reset_config
# Header pin numbers: TRST - 26, SRST - 18
bcm2835gpio_trst_num 7
# reset_config trst_only
bcm2835gpio_srst_num 24
# reset_config srst_only srst_push_pull
# or if you have both connected,
reset_config trst_and_srst srst_push_pull
## end raspberrypi2-native.cfg section
transport select jtag
adapter speed 1000
#ftdi_tdo_sample_edge falling
adapter srst delay 750
jtag_ntrst_assert_width 100
jtag_ntrst_delay 50
#reset_config trst_and_srst separate srst_gates_jtag srst_open_drain
if { [info exists CHIPNAME] } {
set _CHIPNAME $CHIPNAME
} else {
set _CHIPNAME mt7622
}
if { [info exists DAP_TAPID] } {
set _DAP_TAPID $DAP_TAPID
} else {
set _DAP_TAPID 0x4ba00477
}
jtag newtap $_CHIPNAME cpu -irlen 4 -ircapture 0x1 -irmask 0x0f -expected-id $_DAP_TAPID
dap create $_CHIPNAME.dap -chain-position $_CHIPNAME.cpu
target create $_CHIPNAME.ahb mem_ap -dap $_CHIPNAME.dap -ap-num 0 -dbgbase 0x80070000
# declare the 2 main application cores
set _TARGETNAME $_CHIPNAME.core
set _smp_command ""
set $_TARGETNAME.base(0) 0x80810000
set $_TARGETNAME.base(1) 0x80910000
set $_TARGETNAME.cti(0) 0x80820000
set $_TARGETNAME.cti(1) 0x80920000
set _cores 2
proc mmu_off {} {
set cp [aarch64 mrc 15 0 1 0 0]
set cp [expr ($cp & ~1)]
aarch64 mcr 15 0 1 0 0 $cp
}
proc mmu_on {} {
set cp [aarch64 mrc 15 0 1 0 0]
set cp [expr ($cp | 1)]
aatch64 mcr 15 0 1 0 0 $cp
}
proc mt7622_reset {} {
#
# halt target
#
poll
sleep 2
halt
wait_halt
#
# disable wdt
#
mww 0x10212000 0x22000000
mmu_off
mt7622.core0 configure -work-area-phys 0x101000 -work-area-size 8096
# switch to AArch64 mode
reg cpsr 0x1d3
load_image /usr/src/mtk-openocd-scripts/mt7622/switch_mode_32_64.bin 0x100000 bin
load_image /usr/src/mtk-openocd-scripts/mt7622/aarch64_stall.bin 0x100100 bin
reg pc 0x100000
resume
}
proc mt7622_ddrinit {} {
# initialize DDR with for 1 chip
load_image /usr/src/mtk-openocd-scripts/mt7622/bl2-1c.bin 0x201000 bin
# initialize DDR with for 2 chip
#load_image /usr/src/mtk-openocd-scripts/mt7622/bl2-2c.bin 0x201000 bin
reg pc 0x201000
resume
}
proc mt7622_uboot {} {
# load U-Boot and ATF
load_image /usr/src/mtk-openocd-scripts/mt7622/fip-snand-no-bmt.bin 0x40020000 bin
mww 0x100200 1
resume
}
for { set _core 0 } { $_core < $_cores } { incr _core 1 } {
cti create cti$_core -dap $_CHIPNAME.dap -baseaddr [set $_TARGETNAME.cti($_core)] -ap-num 1
set _command "target create ${_TARGETNAME}$_core aarch64 \
-dap $_CHIPNAME.dap -coreid $_core -cti cti$_core \
-dbgbase [set $_TARGETNAME.base($_core)]"
if { $_core != 0 } {
set _smp_command "$_smp_command ${_TARGETNAME}$_core"
set _command "$_command -defer-examine"
} else {
# uncomment to use hardware threads pseudo rtos
set _command "$_command -rtos linux"
set _command "$_command -work-area-size 0x40000 -work-area-phys 0xfff80000 \
-work-area-backup 0"
set _smp_command "target smp ${_TARGETNAME}$_core"
}
eval $_command
}
eval $_smp_command
targets ${_TARGETNAME}0
Also sharing the wiring diagram I made for connecting the JTAG pads to the Raspberry Pi using that particular pogo connector, in case it helps others.
My questions are: do the errors I am getting when trying to load U-Boot seem to indicate the RAM has failed, or a problem with the wiring or configuration? If the RAM failed, is it possible to replace the RAM, or would it even be worth it? Will this issue potentially manifest in my other 2 routers if I reboot them? Should I only use the hardware rocker switch to shutdown/reboot going forward to be safe, or is the same risk present?