May 24, 2023

Grey Cat the Flag CTF 2023 - Crackme2

Grey Cat The Flag CTF 2023 had some nice reversing challenges, this one was very nice and i think deserves a small writeup.

You can grab the original file here. A fixed version was later released fixed, but i used the original binary.

1. Quick Recon

it’s a 64 bits ELF that takes the flag as input

% file crackme2
crackme2: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, BuildID[sha1]=ef9a4ed8207e6b5b4ff62a6c5e140a5059d6d067, for GNU/Linux 3.2.0, stripped

% ./crackme2
Usage: ./crackme2 [greyctf{...}]

also we can notice there’s some antidebug as it’s attempting to ptrace itself, so we’ll probably have to deal with it

% strace -e trace=ptrace ./crackme2 bla
ptrace(PTRACE_TRACEME)                  = -1 EPERM (Operation not permitted)
--- SIGSEGV {si_signo=SIGSEGV, si_code=SI_KERNEL, si_addr=NULL} ---
+++ killed by SIGSEGV +++
zsh: segmentation fault  strace -e trace=ptrace ./crackme2 bla

the segfault is due to the original binary being slightly broken but is not an issue.

main is nothing fancy but we can notice the input flag is copied to the stack:

undefined8 main(int argc,char **argv)
{
    uint uVar1;
    size_t len;
    long in_FS_OFFSET;
    char flag [136];
    long local_10;

    if (argc != 2) {
        printf("Usage: %s [greyctf{...}]\n",*argv);
                    /* WARNING: Subroutine does not return */
        exit(0);
    }
    len = strlen(argv[1]);
    if (0x7f < len) {
        puts("The flag isn\'t so long. You will overflow the program...");
    }
                    /* copy input flag on the stack */
    strcpy(flag,argv[1]);
                    /* meat must be here */
    uVar1 = FUN_004014c3(0);
    printf("%d\n",(ulong)uVar1);
    return 0;
}

everything must happen during the call to FUN_004014c3 however it’s a very long chain of function calls (lot of FUN…. :p):

function calls

so we can work backward instead: find an interesting string like "Wrong flag :(" and walk the XREF up to 00401350:

void execute_mess(void)
{
    int iVar1;
    void *__addr;

    __addr = (void *)syscall_mmap(0,0x1000,7,0x22,0xffffffff,0);
    if (__addr == (void *)0xffffffffffffffff) {
        wrong_flag();
    }
    dyn_call(__addr,0x38c);
    iVar1 = munmap(__addr,0x1000);
    if (iVar1 == -1) {
        wrong_flag();
    }
    return;
}

2. Understanding what’s happening

it starts by allocating a RWX region using mmap. It’s not using the library function, but the syscall instead:

                             syscall_mmap
        00401232 f3 0f 1e fa                  ENDBR64
        00401236 55                           PUSH                    RBP
        00401237 48 89 e5                     MOV                     RBP,RSP
        0040123a 48 89 7d e8                  MOV                     qword ptr [RBP + local_20],RDI
        0040123e 48 89 75 e0                  MOV                     qword ptr [RBP + local_28],RSI
        00401242 89 55 dc                     MOV                     dword ptr [RBP + local_2c],EDX
        00401245 89 4d d8                     MOV                     dword ptr [RBP + local_30],ECX
        00401248 44 89 45 d4                  MOV                     dword ptr [RBP + local_34],R8D
        0040124c 4c 89 4d c8                  MOV                     qword ptr [RBP + local_40],R9
        00401250 49 89 ca                     MOV                     R10,RCX
                                       ; 0x9 is syscall for mmap (https://filippo.io/linux-syscall-table/)
        00401253 48 c7 c0 09 00 00 00         MOV                     RAX,0x9
        0040125a 0f 05                        SYSCALL
        0040125c 48 89 c1                     MOV                     RCX,RAX
        0040125f 48 89 4d f8                  MOV                     qword ptr [RBP + local_10],RCX
        00401263 48 8b 45 f8                  MOV                     RAX,qword ptr [RBP + local_10]
        00401267 5d                           POP                     RBP
        00401268 c3                           RET

then it loops over and over decrypting and executing some code:

void dyn_call(code *mmap_mem,undefined4 idx)
{
    undefined4 index;
    undefined4 key;

    key = 0xad23773b;
    index = idx;
    do {
        decrypt_code(mmap_mem,key,index);
        /* actually returns both index and key for next round */
        index = (*mmap_mem)();
    } while( true );
}

void decrypt_code(void *table,uint key,int offset)

{
    ulong i;
    uint size;
    
    size = ENCRYPTED_CODE[offset];
    for (i = 0; i < size; i = i + 1) {
        *(dword *)((long)table + i * 4) = ENCRYPTED_CODE[i + (long)(offset + 1)] ^ key;
    }
    return;
}

decrypt_code takes a XOR key and offset into an array we call ENCRYPTED_CODE and decrypt+copy into the mmap’ed region.

We can use a small GDB script to trace calls to decrypt_code and dump xor keys and offsets:

import gdb

def read_reg(reg):
    return gdb.parse_and_eval("${}".format(reg))


fd = open("trace_crack2.txt", "w")

gdb.execute('break *0x40133b')  # call decrypt_code
gdb.execute('run aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')

while 1:
    key = int(read_reg('esi')) & 0xffffffff
    idx = int(read_reg('edx'))
    if idx < 0:
        break

    print("key 0x%x idx 0x%x"%(key, idx))
    fd.write("key 0x%x idx 0x%x\n"%(key, idx))
    gdb.execute("continue")

which produces a trace like that:

key 0xad23773b idx 0x38c
key 0x92d26427 idx 0x3c3
key 0x1411f6c3 idx 0x3ff
key 0x69cbae50 idx 0x446
key 0x7332a93 idx 0x477
key 0x93efb281 idx 0x4ad
key 0xe23f1d80 idx 0x4e6
key 0xceb1956e idx 0x517
key 0xab7f0423 idx 0x54a
key 0x565118bb idx 0x57f
key 0xd99d45a idx 0x5b7
key 0x5601610d idx 0x5ef
key 0xb38a2126 idx 0x62a
key 0x1cf5510b idx 0x670
key 0x83125f0c idx 0x6a0
key 0xd48c0a3a idx 0x6d9
key 0xe89cc9cd idx 0x4

which can be used by this script to decrypt and disassemble the code:

import sys
from pwn import *
from capstone import *

# encrypted code region
CODE_OFFSET = 0xc0a0
CODE_SIZE   = 0x10000

def disasm(code):
    md = Cs(CS_ARCH_X86, CS_MODE_64)
    for i in md.disasm(code, 0x0):
        print("0x%x:\t%s\t%s" %(i.address, i.mnemonic, i.op_str))


def dec(data, key, sz):
    key = p32(key)
    size = data[sz*4]   # read size
    code = xor(key, data[(sz+1)*4:(sz+1+size)*4])
    disasm(code)

# read encrypted code from binary
with open(sys.argv[1], "rb") as fp:
    fp.seek(CODE_OFFSET)
    data = fp.read(CODE_SIZE)

with open("trace_crack2.txt") as fp:
    for line in fp:
        line = line.split()

        key = int(line[1][2:], 16)
        idx = int(line[3][2:], 16)

        print("\n\n\n")
        print("bloc: 0x%x   0x%x"%(idx, key))
        dec(data, key, idx)

the decrypted code clearly looks obfuscated:

bloc: 0x38c   0xad23773b
0x0:    mov rdx, 0x3689bc0d
0x7:    movabs  rdi, 0x864e55d5
0x11:   xor rdi, rdx
0x14:   mov rbx, 0x1471e32d
0x1b:   sub rbx, rdi
0x1e:   movabs  r10, 0xe9d3019e
0x28:   sub r10, rbx
0x2b:   mov rsi, 0x9f3a96e
0x32:   sub rsi, r10
0x35:   mov rbx, 0x5bcaf507
0x3c:   xor rbx, rsi
0x3f:   movabs  r10, 0xafdf5a79
0x49:   xor r10, rbx
0x4c:   mov r8, 0x2a3bfde6
0x53:   add r8, r10
0x56:   mov rsi, 0x32b2ae6b
0x5d:   sub rsi, r8
0x60:   movabs  rax, 0x19097a1e9
0x6a:   xor rax, rsi
0x6d:   movabs  rdx, 0xdd66c684
0x77:   movabs  rsi, 0xe89e6c60
0x81:   add rsi, rdx
0x84:   mov rbx, 0x147e8930
0x8b:   xor rbx, rsi
0x8e:   movabs  rsi, 0xfbd886c4
0x98:   xor rsi, rbx
0x9b:   movabs  r9, 0xcb56fcb7
0xa5:   xor r9, rsi
0xa8:   mov rbx, 0x3e2bc149
0xaf:   xor rbx, r9
0xb2:   mov rdx, 0x3231b3f7
0xb9:   add rdx, rbx
0xbc:   movabs  r11, 0xad2afb66
0xc6:   sub r11, rdx
0xc9:   movabs  rcx, 0x1f4b71da6
0xd3:   add rcx, r11
0xd6:   ret

and we can find the ptrace call in there:

bloc: 0x6d9   0xd48c0a3a
0x0:    xor edi, edi
0x2:    push    0x65        ; ptrace syscall number
0x4:    pop rax
0x5:    syscall
0x7:    mov rsi, rax
0xa:    movabs  r10, 0xc9d392ed
0x14:   movabs  rbx, 0xf21ee895

ptrace will return 0 upon success, so one way to circumvent it is to change syscall to xor eax, eax.

Both instructions are the same size:

% rasm2 -b64 'syscall'
0f05
% rasm2 -b64 "xor eax, eax"
31c0

we can use the following script to patch a new binary (not forgetting the XOR):

from pwn import p32

idx = 0x6d9 + 1
xor_key = p32(0xd48c0a3a)
file_offset = 0xc0a0 + idx*4

data = bytearray(open("crackme2", 'rb').read())

# syscall -> xor eax,eax
data[file_offset+5] = 0x31 ^ xor_key[1]
data[file_offset+6] = 0xc0 ^ xor_key[2]

open('crackme2.patch', 'wb').write(data)

if we dump the encrypted code blocs from the patched binary we can validate the change:

bloc: 0x6d9   0xd48c0a3a
0x0:    xor    edi, edi
0x2:    push    0x65
0x4:    pop    rax
0x5:    xor    eax, eax     ; here
0x7:    mov    rsi, rax
0xa:    movabs    r10, 0xc9d392ed
0x14:    movabs    rbx, 0xf21ee895
0x1e:    sub    rbx, r10
0x21:    movabs    r8, 0xa019c58c
0x2b:    xor    r8, rbx

if we re-run the GDB trace script on the patched binary, the generated trace is much longer and that’s now something we can work on

3. Flag in the haystack

The initial trace was executing 17 encrypted blocs, the new one goes over 125

% wc -l trace_crack2.txt.*
  17 trace_crack2.txt.1
 125 trace_crack2.txt.2
 142 total

that’s both a good and bad news because the assembly is obfuscated by a lot of arithmetic constants unfolding and we need to find how the flag is checked.

We can also generate a new disassembly out of this trace using the same script as before:

% ./dump.py crackme2.patch > trace2.txt

At some point the code needs to load the input flag if it wants to check. When the decrypted code is called, debugging shows no obvious reference to the input string in any register, however we know from the main function that it’s been copied to the stack.

We can see some blocks are loading an address relative to the stack, which is already a good lead:

% egrep "bloc|rbp" trace2.txt
[...]
bloc: 0x8d3   0xe2b797ea
0x42:	lea	rdx, [rbp + rdx]
bloc: 0x97b   0xfe5ddd5e
0x42:	lea	rbx, [rbp + rbx]
bloc: 0xa14   0x5eb21c2c
0x4b:	lea	rdx, [rbp + rdx]
bloc: 0xab3   0x44c6f479
0x42:	lea	r10, [rbp + r10]
[...]

also there must be some kind of conditionnal instruction somewhere to trigger the good or bad message.

Some block contain a cmp instruction, but they are just decoys and here for more confusion. The before last block of the new trace is interesting as it ends with:

0x1e1:  test    r12, r12
0x1e4:  cmove   rdx, rbx
0x1e8:  mov rcx, rdx
0x1eb:  ret

The CMOVE instruction will move rbx to rdx if r12 == 0. RDX then ends up in ECX which holds the index of the next bloc to run.

If we debug the binary and force r12 to be 0 at this stage, we end up with the winning message, so in order to get the correct flag, we need to end up with r12 == 0.

% grep r12 trace2.txt
0x91:	xor	r12, r12
0xfb:	or	r12, rdx
0x12c:	or	r12, rdx
0xf5:	or	r12, r11
0x109:	or	r12, rdx
0x115:	or	r12, r8
0x106:	or	r12, rax
0xfa:	or	r12, r10
0x126:	or	r12, rdx
0x10b:	or	r12, rcx
0x108:	or	r12, r9
0x10b:	or	r12, rdx
0x104:	or	r12, r8
0x119:	or	r12, rdi
0x126:	or	r12, r10
0x11c:	or	r12, r9
0x118:	or	r12, rsi
0x10c:	or	r12, rdx
0x10e:	or	r12, rbx
0x119:	or	r12, r10
0x10e:	or	r12, r10
0x118:	or	r12, rdx
0xf1:	or	r12, rdi
0x10f:	or	r12, rax
0x109:	or	r12, rbx
0x128:	or	r12, rax
0x119:	or	r12, r10
0xfb:	or	r12, rbx
0x129:	or	r12, rcx
0xfa:	or	r12, r11
0x119:	or	r12, r11
0x119:	or	r12, rdi
0x108:	or	r12, rcx
0x124:	or	r12, rbx
0x11e:	or	r12, rbx
0x118:	or	r12, rsi
0x12c:	or	r12, r10
0x13c:	or	r12, rbx
0xfe:	or	r12, r8
0x119:	or	r12, r10
0x122:	or	r12, r10
0xfb:	test	r12, r12
0x1e1:	test	r12, r12

it’s first initialized to 0 and sometimes or’d with another register (which must zero as well if we want it to remain zero).

starring at the screen, we can notice a recurring pattern in some blocks, like this one:

bloc: 0x165c   0x803ed377
;
;  /// skipped some instructions
;
; load input flag string 
0x45:   lea r9, [rbp + r9]
;
; /// skipped more instructions
;
; load char 'r9' from the input flag string
0xbb:   movzx   rax, byte ptr [r9 + rbx]
;
; /// skipped even more instructions
;
; do a or with r12
0xfb:   or  r12, rdx

so if can we find these blocks, get the character index that being checked (from the movzx instruction) and then manage to get 0 in the or’d register with r12, we should be good to go.

4. Poor man’s ugly emulation

As i’m not familiar enough with miasm or Triton to quickly do what i wanted in my limited available time, i went down the quick and dirty road:

    def emu(self, code):
        print("######################")

        st = {'r12': 0}

        md = Cs(CS_ARCH_X86, CS_MODE_64)
        for i in md.disasm(code, 0x0):
            op = i.mnemonic
            dat =  i.op_str.replace(",", "").split()
            #print(op, dat)
            match i.mnemonic:
                case 'mov':
                    st[dat[0]] = int(dat[1].replace("0x", ""), 16) & 0xffffffff
                case 'movabs':
                    st[dat[0]] = int(dat[1].replace("0x", ""), 16) & 0xffffffff
                case 'sub':
                    st[dat[0]] -= st[dat[1]]
                    st[dat[0]] &= 0xffffffff
                case 'add':
                    st[dat[0]] += st[dat[1]]
                    st[dat[0]] &= 0xffffffff
                case 'xor':
                    st[dat[0]] ^= st[dat[1]]
                case 'or':
                    st[dat[0]] |= st[dat[1]]
                    if dat[0] == 'r12':
                        # r12 persist and must be 0 for last check to pass
                        #print(" ---------- R12")
                        #print(st[dat[1]])
                        print(st['r12'])
                        # solve dat[1] == 0
                        result = self.slv(st[dat[1]], 0)
                        self.out[pos] = result
                        print(self.out)
                case 'lea':
                    # load flag
                    if st[dat[0]] == 16488:
                        print("load flag")
                        st[dat[0]] = self.flag
                case 'movzx':
                    # load flag chr index
                    o1 = dat[-1][:-1]
                    o2 = dat[-3][1:]
                    if type(st[o1]) is list:
                        pos = st[o2]
                        st[dat[0]] = st[o1][pos]
                    elif type(st[o2]) is list:
                        pos = st[o1]
                        st[dat[0]] = st[o2][pos]
                    else:
                        print(op, dat)
                        print(st)
                        raise
                    print("checking flag pos %d"%pos)

I used z3 to symbolize the input flag and then solve the contrains to keep r12 == 0 (which turned out to be quite simple…)

Full script here.

Running the script yields the following output:

% python emu.py
bloc: 0x801   0x3c23c429
######################
bloc: 0x839   0x47569508
######################
[...]
######################
load flag
checking flag pos 5
0 | 100 - f5 & 255
bytearray(b'\x00\x00\x00\x00{d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
bloc: 0x18d3   0x22b364d3
######################
load flag
checking flag pos 39
0 | 125 - f39 & 255
bytearray(b'\x00\x00\x00\x00{d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00}\x00')
bloc: 0x195d   0xc295cc42
######################
bloc: 0x19b5   0xd7f207a3
######################
load flag
checking flag pos 18
0 | 95 - f18 & 255
bytearray(b'\x00\x00\x00\x00{d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00_\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00}\x00')
bloc: 0x1a46   0x545b9046
######################
load flag
checking flag pos 9
0 | 121 - f9 & 255
bytearray(b'\x00\x00\x00\x00{d\x00\x00\x00y\x00\x00\x00\x00\x00\x00\x00\x00_\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00}\x00')

[....]

######################
load flag
checking flag pos 17
0 | 51 - f17 & 255
bytearray(b'grey{d1d_y0u_s0lv3_b\x00_emul4t1ng?_1e4b8a}\x00')
bloc: 0x3e51   0x193272c7
######################
load flag
checking flag pos 7
bloc: 0x3ee7   0xd6d30c89
######################
load flag
checking flag pos 20
0 | 121 - f20 & 255
bytearray(b'grey{d1d_y0u_s0lv3_by_emul4t1ng?_1e4b8a}\x00')
bloc: 0x3f84   0xd25edc37
######################

5. Files

binary: crackme2
gdb trace script: trace.py
disassembler: dump.py
ptrace patcher: patch.py
emulator: emu.py
execution trace before ptrace patch: trace_crack2_1.txt + disassembly trace1.txt
execution trace after ptrace patch: trace_crack2_2.txt + disassembly trace2.txt