cover image for post 'Practical Reverse Engineering Solutions – Page 35 (Part IV)'

Practical Reverse Engineering Solutions – Page 35 (Part IV)

my go at exercises 8 and 9 on page 38

This blog post presents my solution to exercises 8 and 9 on page 35 from the book Practical Reverse Engineering by Bruce Dang, Alexandre Gazet and Elias Bachaalany (ISBN: 1118787315). The book is my first contact with reverse engineering, so take my statements with a grain of salt. All code snippets are on GitHub. For an overview of my solutions consult this progress page.

Exercise 8

Sample H. Decompile sub_11732 and explain the most likely programming construct used in the original code.

Disassembly

This is the disassembly as generated by IDA. The malware sample has a 4 byte offset compared to the references of the book – so sub_11732 is in fact sub_1172E.

; =============== S U B R O U T I N E =======================================
.text:0001172E
.text:0001172E
.text:0001172E sub_1172E       proc near               ; CODE XREF: sub_11798+115p
.text:0001172E                                         ; sub_11798+157p
.text:0001172E
.text:0001172E arg_0           = dword ptr  4
.text:0001172E
.text:0001172E                 push    esi
.text:0001172F                 mov     esi, [esp+4+arg_0]
.text:00011733                 dec     esi
.text:00011734                 jz      short loc_1175F
.text:00011736                 dec     esi
.text:00011737                 jz      short loc_11755
.text:00011739                 dec     esi
.text:0001173A                 jz      short loc_1174B
.text:0001173C                 sub     esi, 9
.text:0001173F                 jnz     short loc_1176B
.text:00011741                 mov     esi, [eax+8]
.text:00011744                 shr     esi, 1
.text:00011746                 add     eax, 0Ch
.text:00011749                 jmp     short loc_11767
.text:0001174B ; ---------------------------------------------------------------------------
.text:0001174B
.text:0001174B loc_1174B:                              ; CODE XREF: sub_1172E+Cj
.text:0001174B                 mov     esi, [eax+3Ch]
.text:0001174E                 shr     esi, 1
.text:00011750                 add     eax, 5Eh
.text:00011753                 jmp     short loc_11767
.text:00011755 ; ---------------------------------------------------------------------------
.text:00011755
.text:00011755 loc_11755:                              ; CODE XREF: sub_1172E+9j
.text:00011755                 mov     esi, [eax+3Ch]
.text:00011758                 shr     esi, 1
.text:0001175A                 add     eax, 44h
.text:0001175D                 jmp     short loc_11767
.text:0001175F ; ---------------------------------------------------------------------------
.text:0001175F
.text:0001175F loc_1175F:                              ; CODE XREF: sub_1172E+6j
.text:0001175F                 mov     esi, [eax+3Ch]
.text:00011762                 shr     esi, 1
.text:00011764                 add     eax, 40h
.text:00011767
.text:00011767 loc_11767:                              ; CODE XREF: sub_1172E+1Bj
.text:00011767                                         ; sub_1172E+25j ...
.text:00011767                 mov     [ecx], esi
.text:00011769                 mov     [edx], eax
.text:0001176B
.text:0001176B loc_1176B:                              ; CODE XREF: sub_1172E+11j
.text:0001176B                 pop     esi
.text:0001176C                 retn    4
.text:0001176C sub_1172E       endp

C-Code

The disassembly is very easy to decompile. This is the non simplified version that results from a one-to-one translation of the disassembly:

int __userpurge sub_1172E(struct _arg_1 *arg_1, struct _arg_1 **arg_2, 
                int *arg_3, int arg_4)
{
    int tmp;

    switch ( arg_4 )
    {
    case 1:
        tmp = (arg_1->off_3Ch) / 2;
        arg_1 += 64;
        break;
    case 2:
        tmp = (arg_1->off_3Ch) / 2;
        arg_1 += 68;
        break;
    case 3:
        tmp = (arg_1->off_3Ch) / 2;
        arg_1 += 94;
        break;
    default:
        if ( arg_4 != 12 )
            return arg_1;
        tmp = (arg_1->off_8h)/2;
        arg_1 += 12;
        break;
    }
    *arg_3 = tmp;
    *arg_2 = arg_1;
    return arg_1;
}

The code can be simplified to:

int __userpurge sub_1172E(struct _arg_1 *arg_1, struct _arg_1 **arg_2, 
        int *arg_3, int arg_4)
{
    switch ( arg_4 )
    {
    case 1:
        arg_1 += 64;
        break;
    case 2:
        arg_1 += 68;
        break;
    case 3:
        arg_1 += 94;
        break;
    case 12:
        arg_1 += 12;
        break;
    default:
        return arg_1;
    }

    if( arg_1 == 12 )
        *arg_3 = (arg_1->off_8h) / 2;
    else
        *arg_3 = (arg_1->off_3Ch) / 2;

    *arg_2 = arg_1;
    return arg_1;
}

The struct _arg_1 has at least two members at offset 8h and 3Ch respectively:

struct _arg_1 {
    /* 8 bytes of other members */
    int off_8h;
    /* 58 bytes of other members */
    int off_3Ch;
} arg_1;

To answer the question from the book, the snippet probably results from a switch statement.

Exercise 9

Sample L. Explain what function sub_1000CEA0 does and then decompile it back to C.

Disassembly

This is the disassembly generated by IDA Pro:

; ---------------------------------------------------------------------------
.text:1000CE9A                 align 10h
.text:1000CEA0
.text:1000CEA0 ; =============== S U B R O U T I N E =======================================
.text:1000CEA0
.text:1000CEA0 ; Attributes: bp-based frame
.text:1000CEA0
.text:1000CEA0 sub_1000CEA0    proc near               ; CODE XREF: sub_10007A4B+1D7p
.text:1000CEA0                                         ; sub_1000AD4D+3Ap ...
.text:1000CEA0
.text:1000CEA0 arg_0           = dword ptr  8
.text:1000CEA0 arg_4           = byte ptr  0Ch
.text:1000CEA0
.text:1000CEA0                 push    ebp
.text:1000CEA1                 mov     ebp, esp
.text:1000CEA3                 push    edi
.text:1000CEA4                 mov     edi, [ebp+arg_0]
.text:1000CEA7                 xor     eax, eax
.text:1000CEA9                 or      ecx, 0FFFFFFFFh
.text:1000CEAC                 repne scasb
.text:1000CEAE                 add     ecx, 1
.text:1000CEB1                 neg     ecx
.text:1000CEB3                 sub     edi, 1
.text:1000CEB6                 mov     al, [ebp+arg_4]
.text:1000CEB9                 std
.text:1000CEBA                 repne scasb
.text:1000CEBC                 add     edi, 1
.text:1000CEBF                 cmp     [edi], al
.text:1000CEC1                 jz      short loc_1000CEC7
.text:1000CEC3                 xor     eax, eax
.text:1000CEC5                 jmp     short loc_1000CEC9
.text:1000CEC7 ; ---------------------------------------------------------------------------
.text:1000CEC7
.text:1000CEC7 loc_1000CEC7:                           ; CODE XREF: sub_1000CEA0+21j
.text:1000CEC7                 mov     eax, edi
.text:1000CEC9
.text:1000CEC9 loc_1000CEC9:                           ; CODE XREF: sub_1000CEA0+25j
.text:1000CEC9                 cld
.text:1000CECA                 pop     edi
.text:1000CECB                 leave
.text:1000CECC                 retn
.text:1000CECC sub_1000CEA0    endp
.text:1000CECC

Walk-Through

► Function Prologue

The snippet starts with the standard function prologue:

push    ebp
                mov     ebp, esp

► Calculate String Length

The next lines look almost exactly like the ones from the very first exercise:

push    edi
                mov     edi, [ebp+arg_0]
                xor     eax, eax
                or      ecx, 0FFFFFFFFh
                repne scasb
                add     ecx, 1
                neg     ecx
                sub     edi, 1

The snippet starts with retrieving the the first function parameter – a pointer to a null terminated byte string. The instruction repne scasb searches within this string for the null byte stored in eax. The register ecx is initalized to -1 with or ecx, 0FFFFFFFFh and decremented for each time scasb is executed. With add ecx, 1 and neg ecx we get the length of the string including the null byte. This is the only difference to exercise 1 on page 11, where the snippet calculated the length of the string without null byte by adding 2 to ecx instead of just 1. The register edi points to the byte right after the null byte after repne scasb; with sub edi, 1 the pointer is set to point at the null byte.

So if the first function parameter were the string The pool on the roof must have a leak. then after the above instruction one has the following picture:

The pool on the roof must have a leak.0
                                      ^                                     
                                      |
                                      edi

edi points at the null byte ’\0’. The register ecx is set to the length of the string including the null byte, i.e., 39.

► Find Character From End of String

The next four lines are:

mov     al, [ebp+arg_4]
                std
                repne scasb
                add     edi, 1
  • Line 24 stores the byte passed as the second function parameter in register al. The byte probably holds a character.
  • Line 25 sets the direction flag with std. If the direction flag is set, then string processing like the following scasb go backwards.
  • Line 26 searches the character in al in string edi. The search is carried out backwards and for at most ecx characters (the string length plus null byte).
  • The register edi points at the character right before the one searched for (given there is such a character in the string). Line 27 adds one byte to edi so it points at the wanted character.

Say the second function parameter is o, then the snippet will leave us at:

The pool on the roof must have a leak.0
                  ^                                     
                  |
                  edi

► If Character is Found: Return Reference

The next few lines check if the character was actually found, i.e., edi points at the same character as in al. If so, the routine returns edi, i.e., a pointer to the (last) occurrence of the character passed as the second function parameter in the string referenced by the first function parameter:

cmp     [edi], al
                jz      short loc_1000CEC7
                (...)
loc_1000CEC7:                           ; CODE XREF: sub_1000CEA0+21j
                mov     eax, edi

loc_1000CEC9:                           ; CODE XREF: sub_1000CEA0+25j
                cld
                pop     edi
                leave
                retn

► If Character is not Found: Return Null

If there is no character al in the string, then edi will point at the first character of the string (which differs from the search character). The jump in line 29 is therefore not taken and the snippet returns NULL.

xor     eax, eax
                jmp     short loc_1000CEC9
                (...)
loc_1000CEC9:                           ; CODE XREF: sub_1000CEA0+25j
                cld
                pop     edi
                leave
                retn

Summary: The snippet implements the C function strrchr, which searches for the last occurrence of a character in a string.

C-Code

This C-code results results from a one-by-one translation of the dissassembly:

char* strrchr(const char *cp, char ch) {
    int len = 0;
    do
    {    
        len++;                
    } while ( *(char *)cp++ );
    while( len >= 0 ) 
    {
        if( *(char *)cp == ch)
            return cp;
        len--;
        cp--;
    }
    return 0;
}

A much more elegant version is provided for instance by Apple:

char * strrchr(const char *cp, int ch)
{
    char *save;
    char c;

    for (save = (char *) 0; (c = *cp); cp++) {
	if (c == ch)
	    save = (char *) cp;
    }

    return save;
}