Reverse engineering 32 and 64 bits binaries with Radare2 - 10 (pointers and dynamic memory)

Reverse engineering 32 and 64 bits binaries with Radare2 - 10 (pointers and dynamic memory)

Hello I’m Artik Blue and today you will finally understand C pointers.

Let’s start with this code

#include <stdio.h>
#include <stdlib.h>

int main(){

    int i = 2;

    char c = 'c';

    char* pc = &c;

    printf("Value of i: %d \n",i);
    printf("Address of i: %p \n",&i);
    printf("Value of c: %c \n", c);
    printf("Address of c: %p \n",&c);

    printf("Updating the content of the mem address pointed by pc \n");

    *pc = 'b';

    printf("Value of c: %c \n", c);

return 0;
}

In C &c is used for indicating the address of the variable c. *p indicates a pointer. We can have multiple pointers to the same memory location so if that memory location is updated that will affect all of the pointers. If you have zero knowledge about pointers, have a quick read at this one: https://www.programiz.com/c-programming/c-pointers and then go on with this post.

So when compiled, the program outputs:

Value of i: 2 
Address of i: 0x7ffe948d662c 
Value of c: c 
Address of c: 0x7ffe948d662b 
Updating the content of the mem address pointed by pc 
Value of c: b 

And now the disasm:

[0x556609210155]> pdf
            ; DATA XREF from entry0 @ 0x55660921008d
┌ 210: int main (int argc, char **argv, char **envp);
│           ; var int64_t var_15h @ rbp-0x15
│           ; var int64_t var_14h @ rbp-0x14
│           ; var int64_t var_10h @ rbp-0x10
│           ; var int64_t var_8h @ rbp-0x8
│           0x556609210155      55             push rbp
│           0x556609210156      4889e5         mov rbp, rsp
│           0x556609210159      4883ec20       sub rsp, 0x20
│           0x55660921015d      64488b042528.  mov rax, qword fs:[0x28]
│           0x556609210166      488945f8       mov qword [var_8h], rax
│           0x55660921016a      31c0           xor eax, eax
│           0x55660921016c      c745ec020000.  mov dword [var_14h], 2
│           0x556609210173      c645eb63       mov byte [var_15h], 0x63 ; 'c' ; 99
│           0x556609210177      488d45eb       lea rax, [var_15h]
│           0x55660921017b      488945f0       mov qword [var_10h], rax
│           0x55660921017f      8b45ec         mov eax, dword [var_14h]
│           0x556609210182      89c6           mov esi, eax
│           0x556609210184      488d3d7d0e00.  lea rdi, str.Value_of_i:__d ; 0x556609211008 ; "Value of i: %d \n"
│           0x55660921018b      b800000000     mov eax, 0
│           0x556609210190      e8bbfeffff     call sym.imp.printf     ; int printf(const char *format)
│           0x556609210195      488d45ec       lea rax, [var_14h]
│           0x556609210199      4889c6         mov rsi, rax
│           0x55660921019c      488d3d760e00.  lea rdi, str.Address_of_i:__p ; 0x556609211019 ; "Address of i: %p \n"
│           0x5566092101a3      b800000000     mov eax, 0
│           0x5566092101a8      e8a3feffff     call sym.imp.printf     ; int printf(const char *format)
│           0x5566092101ad      0fb645eb       movzx eax, byte [var_15h]
│           0x5566092101b1      0fbec0         movsx eax, al
│           0x5566092101b4      89c6           mov esi, eax
│           0x5566092101b6      488d3d6f0e00.  lea rdi, str.Value_of_c:__c ; 0x55660921102c ; "Value of c: %c \n"
│           0x5566092101bd      b800000000     mov eax, 0
│           0x5566092101c2      e889feffff     call sym.imp.printf     ; int printf(const char *format)
│           0x5566092101c7      488d45eb       lea rax, [var_15h]
│           0x5566092101cb      4889c6         mov rsi, rax
│           0x5566092101ce      488d3d680e00.  lea rdi, str.Address_of_c:__p ; 0x55660921103d ; "Address of c: %p \n"
│           0x5566092101d5      b800000000     mov eax, 0
│           0x5566092101da      e871feffff     call sym.imp.printf     ; int printf(const char *format)
│           0x5566092101df      488d3d6a0e00.  lea rdi, str.Updating_the_content_of_the_mem_address_pointed_by_pc ; 0x556609211050 ; "Updating the content of the mem address pointed by pc "
│           0x5566092101e6      e845feffff     call sym.imp.puts       ; int puts(const char *s)
│           0x5566092101eb      488b45f0       mov rax, qword [var_10h]
│           0x5566092101ef      c60062         mov byte [rax], 0x62    ; 'b'
│                                                                      ; [0x62:1]=255 ; 98
│           0x5566092101f2      0fb645eb       movzx eax, byte [var_15h]
│           0x5566092101f6      0fbec0         movsx eax, al
│           0x5566092101f9      89c6           mov esi, eax
│           0x5566092101fb      488d3d2a0e00.  lea rdi, str.Value_of_c:__c ; 0x55660921102c ; "Value of c: %c \n"
│           0x556609210202      b800000000     mov eax, 0
│           0x556609210207      e844feffff     call sym.imp.printf     ; int printf(const char *format)
│           0x55660921020c      b800000000     mov eax, 0
│           0x556609210211      488b55f8       mov rdx, qword [var_8h]
│           0x556609210215      644833142528.  xor rdx, qword fs:[0x28]
│       ┌─< 0x55660921021e      7405           je 0x556609210225
│       │   0x556609210220      e81bfeffff     call sym.imp.__stack_chk_fail ; void __stack_chk_fail(void)
│       └─> 0x556609210225      c9             leave
└           0x556609210226      c3             ret
[0x556609210155]> 

Hey ho! Let’s dissect that. At first the program initializes those variables:

│           0x55660921016c      c745ec020000.  mov dword [var_14h], 2
│           0x556609210173      c645eb63       mov byte [var_15h], 0x63 ; 'c' ; 99
│           0x556609210177      488d45eb       lea rax, [var_15h]
│           0x55660921017b      488945f0       mov qword [var_10h], rax

So the first var is being initialized with 2, then another var is initialized with 0x63 or char C and then the memory address of (lea = load effective address) that var and not the content is being passed to var_10h. As we are almost sure that those first vars are related the i int and the c char, we can rename it using afvn, so for now the thing will look like:

[0x556609210155]> afv
var int64_t char_c @ rbp-0x15
var int64_t int_i @ rbp-0x14
var int64_t var_10h @ rbp-0x10
var int64_t var_8h @ rbp-0x8 = stack cookie

And as var_10h will hold the ADDRESS of c we can rename it as pointer_c

[0x556609210155]> afvn pointer_c var_10h
[0x556609210155]> afv
var int64_t char_c @ rbp-0x15
var int64_t int_i @ rbp-0x14
var int64_t pointer_c @ rbp-0x10
var int64_t var_8h @ rbp-0x8

Let’s inspect the first printf

│           0x55660921017f      8b45ec         mov eax, dword [int_i]
│           0x556609210182      89c6           mov esi, eax
│           0x556609210184      488d3d7d0e00.  lea rdi, str.Value_of_i:__d ; 0x556609211008 ; "Value of i: %d \n"
│           0x55660921018b      b800000000     mov eax, 0
│           0x556609210190      e8bbfeffff     call sym.imp.printf     ; int printf(const char *format)

Nothing new here, by “[ref]” we indicate that we want the content of whats pointed by int_i, so the value of the i var, we load it in eax (we don’t use rax cause we have enough space using eax only), then we load base address of the string to print “Value of…” as a parameter as well. The printf function will take those two params and do its magic as we already should know. Just note that on this first case, we are passing the value of the variable, instead of its address. Let’s move on:

│           0x556609210195      488d45ec       lea rax, [int_i]
│           0x556609210199      4889c6         mov rsi, rax
│           0x55660921019c      488d3d760e00.  lea rdi, str.Address_of_i:__p ; 0x556609211019 ; "Address of i: %p \n"
│           0x5566092101a3      b800000000     mov eax, 0
│           0x5566092101a8      e8a3feffff     call sym.imp.printf     ; int printf(const char *format)

Just look how on this next block the progam does lea instead of mov. This time, the program loads the ADDRESS of int_i instead of the value located at whats pointed by int_i. So in general terms using &var internally translates to lea reg, “[var]”. This is a key concept in reverse engineering, exploit development and in general terms anything related to low level program analysis, you should have it very clear. Let’s proceed

│           0x5566092101ad      0fb645eb       movzx eax, byte [char_c]
│           0x5566092101b1      0fbec0         movsx eax, al
│           0x5566092101b4      89c6           mov esi, eax
│           0x5566092101b6      488d3d6f0e00.  lea rdi, str.Value_of_c:__c ; 0x55660921102c ; "Value of c: %c \n"
│           0x5566092101bd      b800000000     mov eax, 0
│           0x5566092101c2      e889feffff     call sym.imp.printf     ; int printf(const char *format)

Nothing worth commenting here, the VALUE is loaded, movzx is used for mem optimization.

│           0x5566092101c7      488d45eb       lea rax, [char_c]
│           0x5566092101cb      4889c6         mov rsi, rax
│           0x5566092101ce      488d3d680e00.  lea rdi, str.Address_of_c:__p ; 0x55660921103d ; "Address of c: %p \n"
│           0x5566092101d5      b800000000     mov eax, 0
│           0x5566092101da      e871feffff     call sym.imp.printf     ; int printf(const char *format)

Then, again, the ADDRESS is loaded this time.

But at the end of the program we have this:

│           0x5566092101eb      488b45f0       mov rax, qword [pointer_c]
│           0x5566092101ef      c60062         mov byte [rax], 0x62    ; 'b'
│                                                                      ; [0x62:1]=255 ; 98
│           0x5566092101f2      0fb645eb       movzx eax, byte [char_c]
│           0x5566092101f6      0fbec0         movsx eax, al
│           0x5566092101f9      89c6           mov esi, eax
│           0x5566092101fb      488d3d2a0e00.  lea rdi, str.Value_of_c:__c ; 0x55660921102c ; "Value of c: %c \n"
│           0x556609210202      b800000000     mov eax, 0
│           0x556609210207      e844feffff     call sym.imp.printf     ; int printf(const char *format)

What happens here? The contents of pointer_c are moved to eax, then the content of what is pointed by eax is updated with ‘b’ and then we forget about pointer_c and we just printf what is pointed by char_c (???) Debuging is quite useful for understanding what is happening here.

So we start like this:

│           0x5566092101eb b    488b45f0       mov rax, qword [pointer_c]
│           0x5566092101ef      c60062         mov byte [rax], 0x62    ; 'b'
│                                                                      ; [0x62:1]=255 ; 98
│           0x5566092101f2      0fb645eb       movzx eax, byte [char_c]
│           0x5566092101f6      0fbec0         movsx eax, al
│           0x5566092101f9      89c6           mov esi, eax
│           0x5566092101fb      488d3d2a0e00.  lea rdi, str.Value_of_c:__c ; 0x55660921102c ; "Value of c: %c \n"
│           0x556609210202      b800000000     mov eax, 0
│           0x556609210207      e844feffff     call sym.imp.printf     ; int printf(const char *format)
[...]
[0x5566092101eb]> afvd
var var_8h = 0x7ffd71635228 = (qword)0xa0af6487b4e4ef00
var int_i = 0x7ffd7163521c = (qword)0x7163521b00000002
var char_c = 0x7ffd7163521b = (qword)0x63521b0000000263
var pointer_c = 0x7ffd71635220 = (qword)0x00007ffd7163521b

As you can see, the content of pointer_c (0x00007ffd7163521b) is the mem address of char_c (0x7ffd7163521b) this is how pointers work, pointers store memory addresses.

So after we do the first mov, rax will look like this:

[0x5566092101eb]> ds
[0x5566092101ef]> dr rax
0x7ffd7163521b

Now rax holds the CONTENT of pointer_c and the content of pointer_c is the ADDRESS of char c, so doing “[rax]” is the same as doing “[char_c]”. And char_c contains a ‘c’ char:

[0x5566092101ef]> afvd
var var_8h = 0x7ffd71635228 = (qword)0xa0af6487b4e4ef00
var int_i = 0x7ffd7163521c = (qword)0x7163521b00000002
var char_c = 0x7ffd7163521b = (qword)0x63521b0000000263
var pointer_c = 0x7ffd71635220 = (qword)0x00007ffd7163521b

[0x5566092101ef]> pxw @ 0x7ffd7163521b
0x7ffd7163521b  0x00000263 0x63521b00 0x007ffd71 0xe4ef0000  c.....Rcq.......

And after doing the second mov

[0x5566092101ef]> ds
[0x5566092101f2]> pxw @ 0x7ffd7163521b
0x7ffd7163521b  0x00000262 0x63521b00 0x007ffd71 0xe4ef0000  b.....Rcq.......

Now it contains a ‘b’ char.

Then the program continues, this time it loads char_c but that’s ok cause the address of char_c is the same as “[rax]” or “[pointer_c]”!

And we are done with this first example, let’s move on with something more complex but easy as well :)

Remember the program we did on one of the past tutorials? The one that was related to an array that needed to be filled by a user in a while loop. Let’s inspect this one:

#include <stdio.h>
 
main() {
  int data[100];      
  int entered;    
  int i;           
  long sum=0;     
 
  do {
    printf("How many numbers? ");
    scanf("%d", &entered);
    if (entered>100)  
      printf("Limit is 100");
  } while (entered>100);  
 
  
  for (i=0; i<entered; i++) {
    printf("Enter number %d: ", i+1);
    scanf("%d", &data[i]);
  }
 
  
  for (i=0; i<entered; i++) 
    sum += data[i];
 
  printf("SUM: %ld\n", sum);
}

We should already know how the program works, no mystery, no pointers used here, just static memory. Let’s see:

[0x55c2a9231155]> pdf
            ; DATA XREF from entry0 @ 0x55c2a923108d
┌ 335: int main (int argc, char **argv, char **envp);
│           ; var int64_t var_1b0h @ rbp-0x1b0
│           ; var int64_t var_1ach @ rbp-0x1ac
│           ; var int64_t var_1a8h @ rbp-0x1a8
│           ; var int64_t var_1a0h @ rbp-0x1a0
│           ; var int64_t var_8h @ rbp-0x8
│           0x55c2a9231155      55             push rbp
│           0x55c2a9231156      4889e5         mov rbp, rsp
│           0x55c2a9231159      4881ecb00100.  sub rsp, 0x1b0
│           0x55c2a9231160      64488b042528.  mov rax, qword fs:[0x28]
│           0x55c2a9231169      488945f8       mov qword [var_8h], rax
│           0x55c2a923116d      31c0           xor eax, eax
│           0x55c2a923116f      48c78558feff.  mov qword [var_1a8h], 0
│       ┌─> 0x55c2a923117a      488d3d830e00.  lea rdi, str.How_many_numbers ; 0x55c2a9232004 ; "How many numbers? "
│       ╎   0x55c2a9231181      b800000000     mov eax, 0
│       ╎   0x55c2a9231186      e8b5feffff     call sym.imp.printf     ; int printf(const char *format)
│       ╎   0x55c2a923118b      488d8550feff.  lea rax, [var_1b0h]
│       ╎   0x55c2a9231192      4889c6         mov rsi, rax
│       ╎   0x55c2a9231195      488d3d7b0e00.  lea rdi, [0x55c2a9232017] ; "%d"
│       ╎   0x55c2a923119c      b800000000     mov eax, 0
│       ╎   0x55c2a92311a1      e8aafeffff     call sym.imp.__isoc99_scanf ; int scanf(const char *format)
│       ╎   0x55c2a92311a6      8b8550feffff   mov eax, dword [var_1b0h]
│       ╎   0x55c2a92311ac      83f864         cmp eax, 0x64           ; 100
│      ┌──< 0x55c2a92311af      7e11           jle 0x55c2a92311c2
│      │╎   0x55c2a92311b1      488d3d620e00.  lea rdi, str.Limit_is_100 ; 0x55c2a923201a ; "Limit is 100"
│      │╎   0x55c2a92311b8      b800000000     mov eax, 0
│      │╎   0x55c2a92311bd      e87efeffff     call sym.imp.printf     ; int printf(const char *format)
│      └──> 0x55c2a92311c2      8b8550feffff   mov eax, dword [var_1b0h]
│       ╎   0x55c2a92311c8      83f864         cmp eax, 0x64           ; 100
│       └─< 0x55c2a92311cb      7fad           jg 0x55c2a923117a
│           0x55c2a92311cd      c78554feffff.  mov dword [var_1ach], 0
│       ┌─< 0x55c2a92311d7      eb4e           jmp 0x55c2a9231227
│      ┌──> 0x55c2a92311d9      8b8554feffff   mov eax, dword [var_1ach]
│      ╎│   0x55c2a92311df      83c001         add eax, 1
│      ╎│   0x55c2a92311e2      89c6           mov esi, eax
│      ╎│   0x55c2a92311e4      488d3d3c0e00.  lea rdi, str.Enter_number__d: ; 0x55c2a9232027 ; "Enter number %d: "
│      ╎│   0x55c2a92311eb      b800000000     mov eax, 0
│      ╎│   0x55c2a92311f0      e84bfeffff     call sym.imp.printf     ; int printf(const char *format)
│      ╎│   0x55c2a92311f5      488d8560feff.  lea rax, [var_1a0h]
│      ╎│   0x55c2a92311fc      8b9554feffff   mov edx, dword [var_1ach]
│      ╎│   0x55c2a9231202      4863d2         movsxd rdx, edx
│      ╎│   0x55c2a9231205      48c1e202       shl rdx, 2
│      ╎│   0x55c2a9231209      4801d0         add rax, rdx
│      ╎│   0x55c2a923120c      4889c6         mov rsi, rax
│      ╎│   0x55c2a923120f      488d3d010e00.  lea rdi, [0x55c2a9232017] ; "%d"
│      ╎│   0x55c2a9231216      b800000000     mov eax, 0
│      ╎│   0x55c2a923121b      e830feffff     call sym.imp.__isoc99_scanf ; int scanf(const char *format)
│      ╎│   0x55c2a9231220      838554feffff.  add dword [var_1ach], 1
│      ╎│   ; CODE XREF from main @ 0x55c2a92311d7
│      ╎└─> 0x55c2a9231227      8b8550feffff   mov eax, dword [var_1b0h]
│      ╎    0x55c2a923122d      398554feffff   cmp dword [var_1ach], eax
│      └──< 0x55c2a9231233      7ca4           jl 0x55c2a92311d9
│           0x55c2a9231235      c78554feffff.  mov dword [var_1ach], 0
│       ┌─< 0x55c2a923123f      eb1f           jmp 0x55c2a9231260
│      ┌──> 0x55c2a9231241      8b8554feffff   mov eax, dword [var_1ach]
│      ╎│   0x55c2a9231247      4898           cdqe
│      ╎│   0x55c2a9231249      8b848560feff.  mov eax, dword [rbp + rax*4 - 0x1a0]
│      ╎│   0x55c2a9231250      4898           cdqe
│      ╎│   0x55c2a9231252      48018558feff.  add qword [var_1a8h], rax
│      ╎│   0x55c2a9231259      838554feffff.  add dword [var_1ach], 1
│      ╎│   ; CODE XREF from main @ 0x55c2a923123f
│      ╎└─> 0x55c2a9231260      8b8550feffff   mov eax, dword [var_1b0h]
│      ╎    0x55c2a9231266      398554feffff   cmp dword [var_1ach], eax
│      └──< 0x55c2a923126c      7cd3           jl 0x55c2a9231241
│           0x55c2a923126e      488b8558feff.  mov rax, qword [var_1a8h]
│           0x55c2a9231275      4889c6         mov rsi, rax
│           0x55c2a9231278      488d3dba0d00.  lea rdi, str.SUM:__ld   ; 0x55c2a9232039 ; "SUM: %ld\n"
│           0x55c2a923127f      b800000000     mov eax, 0
│           0x55c2a9231284      e8b7fdffff     call sym.imp.printf     ; int printf(const char *format)
│           0x55c2a9231289      b800000000     mov eax, 0
│           0x55c2a923128e      488b4df8       mov rcx, qword [var_8h]
│           0x55c2a9231292      6448330c2528.  xor rcx, qword fs:[0x28]
│       ┌─< 0x55c2a923129b      7405           je 0x55c2a92312a2
│       │   0x55c2a923129d      e88efdffff     call sym.imp.__stack_chk_fail ; void __stack_chk_fail(void)
│       └─> 0x55c2a92312a2      c9             leave
└           0x55c2a92312a3      c3             ret
[0x55c2a9231155]> 

What I want you to note here is the following, at the very beginning of the code you see this = 432:

│           0x55c2a9231159      4881ecb00100.  sub rsp, 

You should have seen instructions like this one in many of the binaries we analyzed during our journey. What this does is basically keep some memory space in the local memory related to the function (using the stack) to hold local variables. (check this https://en.wikibooks.org/wiki/X86_Disassembly/Functions_and_Stack_Frames)

And this time in here it looks like it keeps a bit more than 400 bytes. If you think about it, it makes sense as according to the code, we are declaring an array of 100 ints, assuming 4 bytes per int, those 400 bytes and more make sense. It is very common to see the program keeping a bit more space than the initially needed. Watching this sub rbp at the very beginning of the function will reveal interesting information.

I’m not going to enter into a lot of details on the code, as there is almost nothing new here, you should know how to analyze it very well

What I want to note this time is the following. In this program, we are using a do-while instead of a while, we can see that here:

│       ┌─> 0x55c2a923117a      488d3d830e00.  lea rdi, str.How_many_numbers ; 0x55c2a9232004 ; "How many numbers? "
│       ╎   0x55c2a9231181      b800000000     mov eax, 0
│       ╎   0x55c2a9231186      e8b5feffff     call sym.imp.printf     ; int printf(const char *format)
│       ╎   0x55c2a923118b      488d8550feff.  lea rax, [var_1b0h]
│       ╎   0x55c2a9231192      4889c6         mov rsi, rax
│       ╎   0x55c2a9231195      488d3d7b0e00.  lea rdi, [0x55c2a9232017] ; "%d"
│       ╎   0x55c2a923119c      b800000000     mov eax, 0
│       ╎   0x55c2a92311a1      e8aafeffff     call sym.imp.__isoc99_scanf ; int scanf(const char *format)
│       ╎   0x55c2a92311a6      8b8550feffff   mov eax, dword [var_1b0h]
│       ╎   0x55c2a92311ac      83f864         cmp eax, 0x64           ; 100
│      ┌──< 0x55c2a92311af      7e11           jle 0x55c2a92311c2
│      │╎   0x55c2a92311b1      488d3d620e00.  lea rdi, str.Limit_is_100 ; 0x55c2a923201a ; "Limit is 100"
│      │╎   0x55c2a92311b8      b800000000     mov eax, 0
│      │╎   0x55c2a92311bd      e87efeffff     call sym.imp.printf     ; int printf(const char *format)
│      └──> 0x55c2a92311c2      8b8550feffff   mov eax, dword [var_1b0h]
│       ╎   0x55c2a92311c8      83f864         cmp eax, 0x64           ; 100
│       └─< 0x55c2a92311cb      7fad           jg 0x55c2a923117a

As you see, the block of code is executed at first, then after that the condition check (cmp 100) is done and the program jumps back to the top of the code or just goes on, but the block of code is executed min once. If we do a while instad of a do while, as we know, the program will jump and do the cmp, then will go to the while block of code or not based on the cmp.

The rest of the program is no mystery, let’s move on and inspect the same program but this time using dynamic memory:

#include <stdio.h>
#include <stdlib.h>
 
main() {
  int* data;      
  int valnum;    
  int i;          
  long sum=0;     

  do {
    printf("How many vals you need to add? ");
    scanf("%d", &valnum);
    data = (int *) malloc (valnum * sizeof(int));
    if (data == NULL)  
      printf("NO SPACE AVAILABLE.");
  } while (data == NULL); 


  for (i=0; i<valnum; i++) {
    printf("ENTER NUM %d ", i+1);
    scanf("%d", data+i);
  }
 

  for (i=0; i<valnum; i++) 
    sum += *(data+i);
 
  printf("SUM: %ld\n", sum);
  free(data);
}

This is new! So the program at first, will ask the user how many values it wants to add, then it will do this:

data = (int *) malloc (valnum * sizeof(int));

We can break that in parts for a better understanding. At first we do valnum * sizeof(int), sizeof(int) will return the size of an int in bytes in our system ex: 4, then it will be multiplied by valnum, the user input (how many values do you want to add), so if the user wants 4 values it will do something like 4*4 = 16 bytes, the space needed for allocating 4 ints, you got it, right? Then, that will be passed to malloc. This function is used for allocating space in memory. void *malloc(size_t size) allocates the requested memory (heap) and returns a pointer to it(stack) (or NULL if the request fails), that is why the result of malloc is stored in a pointer to int!

Then the program does the following:

scanf("%d", data+i);

Note that normally when using scan we pass in &var, this time we are just passing data+i, being i an integer (1,2.3…), that is why data is a pointer itself, a memory address, so using & won’t make sense here, +i is used here to indicate the next memory address available as data is a pointer to the base addr of that dynamic array.

Then the same logic follows when doing the sum

sum += *(data+i);

Let’s now disasm this program and analyze it:

[0x558471c62175]> pdf
            ; DATA XREF from entry0 @ 0x558471c620ad
┌ 322: int main (int argc, char **argv, char **envp);
│           ; var int64_t var_20h @ rbp-0x20
│           ; var int64_t var_1ch @ rbp-0x1c
│           ; var int64_t var_18h @ rbp-0x18
│           ; var int64_t var_10h @ rbp-0x10
│           ; var int64_t var_8h @ rbp-0x8
│           0x558471c62175      55             push rbp
│           0x558471c62176      4889e5         mov rbp, rsp
│           0x558471c62179      4883ec20       sub rsp, 0x20
│           0x558471c6217d      64488b042528.  mov rax, qword fs:[0x28]
│           0x558471c62186      488945f8       mov qword [var_8h], rax
│           0x558471c6218a      31c0           xor eax, eax
│           0x558471c6218c      48c745e80000.  mov qword [var_18h], 0
│       ┌─> 0x558471c62194      488d3d6d0e00.  lea rdi, str.How_many_vals_you_need_to_add ; 0x558471c63008 ; "How many vals you need to add? "
│       ╎   0x558471c6219b      b800000000     mov eax, 0
│       ╎   0x558471c621a0      e8abfeffff     call sym.imp.printf     ; int printf(const char *format)
│       ╎   0x558471c621a5      488d45e0       lea rax, [var_20h]
│       ╎   0x558471c621a9      4889c6         mov rsi, rax
│       ╎   0x558471c621ac      488d3d750e00.  lea rdi, [0x558471c63028] ; "%d"
│       ╎   0x558471c621b3      b800000000     mov eax, 0
│       ╎   0x558471c621b8      e8b3feffff     call sym.imp.__isoc99_scanf ; int scanf(const char *format)
│       ╎   0x558471c621bd      8b45e0         mov eax, dword [var_20h]
│       ╎   0x558471c621c0      4898           cdqe
│       ╎   0x558471c621c2      48c1e002       shl rax, 2
│       ╎   0x558471c621c6      4889c7         mov rdi, rax
│       ╎   0x558471c621c9      e892feffff     call sym.imp.malloc     ;  void *malloc(size_t size)
│       ╎   0x558471c621ce      488945f0       mov qword [var_10h], rax
│       ╎   0x558471c621d2      48837df000     cmp qword [var_10h], 0
│      ┌──< 0x558471c621d7      7511           jne 0x558471c621ea
│      │╎   0x558471c621d9      488d3d4b0e00.  lea rdi, str.NO_SPACE_AVAILABLE. ; 0x558471c6302b ; "NO SPACE AVAILABLE."
│      │╎   0x558471c621e0      b800000000     mov eax, 0
│      │╎   0x558471c621e5      e866feffff     call sym.imp.printf     ; int printf(const char *format)
│      └──> 0x558471c621ea      48837df000     cmp qword [var_10h], 0
│       └─< 0x558471c621ef      74a3           je 0x558471c62194
│           0x558471c621f1      c745e4000000.  mov dword [var_1ch], 0
│       ┌─< 0x558471c621f8      eb45           jmp 0x558471c6223f
│      ┌──> 0x558471c621fa      8b45e4         mov eax, dword [var_1ch]
│      ╎│   0x558471c621fd      83c001         add eax, 1
│      ╎│   0x558471c62200      89c6           mov esi, eax
│      ╎│   0x558471c62202      488d3d360e00.  lea rdi, str.ENTER_NUM__d ; 0x558471c6303f ; "ENTER NUM %d "
│      ╎│   0x558471c62209      b800000000     mov eax, 0
│      ╎│   0x558471c6220e      e83dfeffff     call sym.imp.printf     ; int printf(const char *format)
│      ╎│   0x558471c62213      8b45e4         mov eax, dword [var_1ch]
│      ╎│   0x558471c62216      4898           cdqe
│      ╎│   0x558471c62218      488d14850000.  lea rdx, [rax*4]
│      ╎│   0x558471c62220      488b45f0       mov rax, qword [var_10h]
│      ╎│   0x558471c62224      4801d0         add rax, rdx
│      ╎│   0x558471c62227      4889c6         mov rsi, rax
│      ╎│   0x558471c6222a      488d3df70d00.  lea rdi, [0x558471c63028] ; "%d"
│      ╎│   0x558471c62231      b800000000     mov eax, 0
│      ╎│   0x558471c62236      e835feffff     call sym.imp.__isoc99_scanf ; int scanf(const char *format)
│      ╎│   0x558471c6223b      8345e401       add dword [var_1ch], 1
│      ╎│   ; CODE XREF from main @ 0x558471c621f8
│      ╎└─> 0x558471c6223f      8b45e0         mov eax, dword [var_20h]
│      ╎    0x558471c62242      3945e4         cmp dword [var_1ch], eax
│      └──< 0x558471c62245      7cb3           jl 0x558471c621fa
│           0x558471c62247      c745e4000000.  mov dword [var_1ch], 0
│       ┌─< 0x558471c6224e      eb20           jmp 0x558471c62270
│      ┌──> 0x558471c62250      8b45e4         mov eax, dword [var_1ch]
│      ╎│   0x558471c62253      4898           cdqe
│      ╎│   0x558471c62255      488d14850000.  lea rdx, [rax*4]
│      ╎│   0x558471c6225d      488b45f0       mov rax, qword [var_10h]
│      ╎│   0x558471c62261      4801d0         add rax, rdx
│      ╎│   0x558471c62264      8b00           mov eax, dword [rax]
│      ╎│   0x558471c62266      4898           cdqe
│      ╎│   0x558471c62268      480145e8       add qword [var_18h], rax
│      ╎│   0x558471c6226c      8345e401       add dword [var_1ch], 1
│      ╎│   ; CODE XREF from main @ 0x558471c6224e
│      ╎└─> 0x558471c62270      8b45e0         mov eax, dword [var_20h]
│      ╎    0x558471c62273      3945e4         cmp dword [var_1ch], eax
│      └──< 0x558471c62276      7cd8           jl 0x558471c62250
│           0x558471c62278      488b45e8       mov rax, qword [var_18h]
│           0x558471c6227c      4889c6         mov rsi, rax
│           0x558471c6227f      488d3dc70d00.  lea rdi, str.SUM:__ld   ; 0x558471c6304d ; "SUM: %ld\n"
│           0x558471c62286      b800000000     mov eax, 0
│           0x558471c6228b      e8c0fdffff     call sym.imp.printf     ; int printf(const char *format)
│           0x558471c62290      488b45f0       mov rax, qword [var_10h]
│           0x558471c62294      4889c7         mov rdi, rax
│           0x558471c62297      e894fdffff     call sym.imp.free       ; void free(void *ptr)
│           0x558471c6229c      b800000000     mov eax, 0
│           0x558471c622a1      488b4df8       mov rcx, qword [var_8h]
│           0x558471c622a5      6448330c2528.  xor rcx, qword fs:[0x28]
│       ┌─< 0x558471c622ae      7405           je 0x558471c622b5
│       │   0x558471c622b0      e88bfdffff     call sym.imp.__stack_chk_fail ; void __stack_chk_fail(void)
│       └─> 0x558471c622b5      c9             leave
└           0x558471c622b6      c3             ret
[0x558471c62175]> 

The malloc is first done here:

│       ╎   0x558471c621b8      e8b3feffff     call sym.imp.__isoc99_scanf ; int scanf(const char *format)
│       ╎   0x558471c621bd      8b45e0         mov eax, dword [var_20h]
│       ╎   0x558471c621c0      4898           cdqe
│       ╎   0x558471c621c2      48c1e002       shl rax, 2
│       ╎   0x558471c621c6      4889c7         mov rdi, rax
│       ╎   0x558471c621c9      e892feffff     call sym.imp.malloc     ;  void *malloc(size_t size)

So, we load the user input (var_20h) in eax, and we multiply it with 4 with shl rax, 2. As we see, sizeof is not called here, that is because the compiler already knows the size of an int. So the user input * 4 is passed to malloc. Let’s actually run that.

│       ╎   0x558471c621ce b    488945f0       mov qword [var_10h], rax
│       ╎   0x558471c621d2      48837df000     cmp qword [var_10h], 0
│      ┌──< 0x558471c621d7      7511           jne 0x558471c621ea
│      │╎   0x558471c621d9      488d3d4b0e00.  lea rdi, str.NO_SPACE_AVAILABLE. ; 0x558471c6302b ; "NO SPACE AVAILABLE."
│      │╎   0x558471c621e0      b800000000     mov eax, 0
│      │╎   0x558471c621e5      e866feffff     call sym.imp.printf     ; int printf(const char *format)


[0x558471c621ce]> dr rax
0x5584735cfa80

[0x558471c621ce]> dmhg

Heap Layout
┌────────────────────────────────────┐
│    Malloc chunk @ 0x5584735cf250   │
│ size: 0x410 status: allocated      │
└────────────────────────────────────┘
    v
    │
    │
┌────────────────────────────────────┐
│    Malloc chunk @ 0x5584735cf660   │
│ size: 0x410 status: allocated      │
└────────────────────────────────────┘
    v
    │
    │
┌────────────────────────────────────┐
│    Malloc chunk @ 0x5584735cfa70   │
│ size: 0x20 status: allocated       │
└────────────────────────────────────┘
    v
    │
    └──┐
       │
   ┌───────────────────────────────┐
   │  Top chunk @ 0x5584735cfa90   │
   └───────────────────────────────┘

[0x558471c621ce]> 

Nice, the function returned a pointer there and some heap space has been created! Then the pointer is saved to var_10h

│       ╎   0x558471c621ce b    488945f0       mov qword [var_10h], rax
│       ╎   0x558471c621d2      48837df000     cmp qword [var_10h], 0

Let’s now watch how the memory manipulation for the user input is done:

│      ╎│   0x558471c62213      8b45e4         mov eax, dword [var_1ch]
│      ╎│   0x558471c62216      4898           cdqe
│      ╎│   0x558471c62218      488d14850000.  lea rdx, [rax*4]
│      ╎│   0x558471c62220      488b45f0       mov rax, qword [var_10h]
│      ╎│   0x558471c62224      4801d0         add rax, rdx
│      ╎│   0x558471c62227      4889c6         mov rsi, rax
│      ╎│   0x558471c6222a      488d3df70d00.  lea rdi, [0x558471c63028] ; "%d"
│      ╎│   0x558471c62231      b800000000     mov eax, 0
│      ╎│   0x558471c62236      e835feffff     call sym.imp.__isoc99_scanf ; int scanf(const char *format)

Assuming that var_1ch is the loop counter, we see how it gets moved to eax and then THE CONTENT of rax*4 is loaded into rdx and added to the base addr of the dyn array, being 4 the size of the int it makes all the sense, the counter will go 4 by 4 up the dynamic array and assign the new values right there! Then the address related to there will be passed to scanf

The next block of code calculates the sum. Based on what we know, this shoul not be a mystery:

│      ┌──> 0x558471c62250      8b45e4         mov eax, dword [var_1ch]
│      ╎│   0x558471c62253      4898           cdqe
│      ╎│   0x558471c62255      488d14850000.  lea rdx, [rax*4]
│      ╎│   0x558471c6225d      488b45f0       mov rax, qword [var_10h]
│      ╎│   0x558471c62261      4801d0         add rax, rdx
│      ╎│   0x558471c62264      8b00           mov eax, dword [rax]
│      ╎│   0x558471c62266      4898           cdqe
│      ╎│   0x558471c62268      480145e8       add qword [var_18h], rax
│      ╎│   0x558471c6226c      8345e401       add dword [var_1ch], 1

Same thing is done but this time var_18h is used for storing the sum.

At the end of the program free is called. I like this answer of stack overflow: https://stackoverflow.com/questions/14986543/calling-free-in-c

In general terms calling free will tell the program that this area of memory can now be used for writting new memory. If a developer does not use free after malloc, and then uses malloc multiple times again, the program memory will grow and grow and that may cause a lot of performance issues and even security problems.

│           0x558471c62290      488b45f0       mov rax, qword [var_10h]
│           0x558471c62294      4889c7         mov rdi, rax
│           0x558471c62297      e894fdffff     call sym.imp.free       ; void free(void *ptr)

Just note that we pass the base addr of our dyn array to free.

Reverse engineering 32 and 64 bits binaries with Radare2 - 10 (pointers and dynamic memory)
Older post

Reverse engineering 32 and 64 bits binaries with Radare2 - 9 (files; read, write, seek and some heaps :O)

Newer post

Reverse engineering 32 and 64 bits binaries with Radare2 - 11 (more pointers and dynamic structs)

Reverse engineering 32 and 64 bits binaries with Radare2 - 10 (pointers and dynamic memory)