Reverse engineering 32 and 64 bits binaries with Radare2 - 9 (files; read, write, seek and some heaps :O)

Reverse engineering 32 and 64 bits binaries with Radare2 - 9 (files; read, write, seek and some heaps :O)

Heeello!

Today I want to talk you about file operations. File read and write operations are very important as most of the programs somehow work with files, for storing projects, for opening files for editing, for storing temp information, etc. Files are nothing more than information containers and can be represented as, for example, char arrays on memory.

Write to file

Let’s start with this program:

#include <stdio.h>
#include <string.h>
main(){
func();
getchar();
}


func()
{ 

   FILE* ftest;
 
    ftest = fopen("test.txt", "wt");
    fputs("This is a line\n", ftest);
    fputs("Another line", ftest);
    fputs(" that follows the second line\n", ftest);
    fclose(ftest);

}

To warm this thing up a little bit, let’s inspect the program information:

[0x7f9d4a962090]> iI
arch     x86
baddr    0x55ea4645b000
binsz    6605
bintype  elf
bits     64
canary   false
class    ELF64
compiler GCC: (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
crypto   false
endian   little
havecode true
intrp    /lib64/ld-linux-x86-64.so.2
laddr    0x0
lang     c
linenum  true
lsyms    true
machine  AMD x86-64 architecture
maxopsz  16
minopsz  1
nx       true
os       linux
pcalign  0
pic      true
relocs   true
relro    full
rpath    NONE
sanitiz  false
static   false
stripped false
subsys   linux
va       true
[0x7f9d4a962090]> 

Then the strings

[0x7f9d4a962090]> iz
[Strings]
nth paddr      vaddr          len size section type  string
―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
0   0x0000084b 0x55ea4645b84b 8   9    .rodata ascii test.txt
1   0x00000854 0x55ea4645b854 15  16   .rodata ascii This is a line\n
2   0x00000864 0x55ea4645b864 12  13   .rodata ascii Another line
3   0x00000878 0x55ea4645b878 30  31   .rodata ascii  that follows the second line\n

[0x7f9d4a962090]> 

And the imports:

[0x7f9d4a962090]> iS
[Sections]

nth paddr        size vaddr           vsize perm name
―――――――――――――――――――――――――――――――――――――――――――――――――――――
0   0x00000000    0x0 0x00000000        0x0 ---- 
1   0x00000238   0x1c 0x55ea4645b238   0x1c -r-- .interp
2   0x00000254   0x20 0x55ea4645b254   0x20 -r-- .note.ABI_tag
3   0x00000274   0x24 0x55ea4645b274   0x24 -r-- .note.gnu.build_id
4   0x00000298   0x1c 0x55ea4645b298   0x1c -r-- .gnu.hash
5   0x000002b8   0xf0 0x55ea4645b2b8   0xf0 -r-- .dynsym
6   0x000003a8   0x99 0x55ea4645b3a8   0x99 -r-- .dynstr
7   0x00000442   0x14 0x55ea4645b442   0x14 -r-- .gnu.version
8   0x00000458   0x20 0x55ea4645b458   0x20 -r-- .gnu.version_r
9   0x00000478   0xc0 0x55ea4645b478   0xc0 -r-- .rela.dyn
10  0x00000538   0x60 0x55ea4645b538   0x60 -r-- .rela.plt
11  0x00000598   0x17 0x55ea4645b598   0x17 -r-x .init
12  0x000005b0   0x50 0x55ea4645b5b0   0x50 -r-x .plt
13  0x00000600    0x8 0x55ea4645b600    0x8 -r-x .plt.got
14  0x00000610  0x222 0x55ea4645b610  0x222 -r-x .text
15  0x00000834    0x9 0x55ea4645b834    0x9 -r-x .fini
16  0x00000840   0x57 0x55ea4645b840   0x57 -r-- .rodata
17  0x00000898   0x44 0x55ea4645b898   0x44 -r-- .eh_frame_hdr
18  0x000008e0  0x128 0x55ea4645b8e0  0x128 -r-- .eh_frame
19  0x00000da0    0x8 0x55ea4665bda0    0x8 -rw- .init_array
20  0x00000da8    0x8 0x55ea4665bda8    0x8 -rw- .fini_array
21  0x00000db0  0x1f0 0x55ea4665bdb0  0x1f0 -rw- .dynamic
22  0x00000fa0   0x60 0x55ea4665bfa0   0x60 -rw- .got
23  0x00001000   0x10 0x55ea4665c000   0x10 -rw- .data
24  0x00001010    0x0 0x55ea4665c010    0x8 -rw- .bss
25  0x00001010   0x29 0x00000000       0x29 ---- .comment
26  0x00001040  0x648 0x00000000      0x648 ---- .symtab
27  0x00001688  0x247 0x00000000      0x247 ---- .strtab
28  0x000018cf   0xfe 0x00000000       0xfe ---- .shstrtab
[0x7f9d4a962090]> 

Great, now we can disasm the principal function:

[0x55ea4645b734]> pdf
            ; CALL XREF from main @ 0x55ea4645b723
┌ 133: sym.func ();
│           ; var int64_t var_8h @ rbp-0x8
│           0x55ea4645b734      55             push rbp
│           0x55ea4645b735      4889e5         mov rbp, rsp
│           0x55ea4645b738      4883ec10       sub rsp, 0x10
│           0x55ea4645b73c      488d35050100.  lea rsi, [0x55ea4645b848] ; "wt"
│           0x55ea4645b743      488d3d010100.  lea rdi, str.test.txt   ; 0x55ea4645b84b ; "test.txt"
│           0x55ea4645b74a      e891feffff     call sym.imp.fopen      ; file*fopen(const char *filename, const char *mode)
│           0x55ea4645b74f      488945f8       mov qword [var_8h], rax
│           0x55ea4645b753      488b45f8       mov rax, qword [var_8h]
│           0x55ea4645b757      4889c1         mov rcx, rax
│           0x55ea4645b75a      ba0f000000     mov edx, 0xf            ; 15
│           0x55ea4645b75f      be01000000     mov esi, 1
│           0x55ea4645b764      488d3de90000.  lea rdi, str.This_is_a_line ; 0x55ea4645b854 ; "This is a line\n"
│           0x55ea4645b76b      e880feffff     call sym.imp.fwrite     ; size_t fwrite(const void *ptr, size_t size, size_t nitems, FILE *stream)
│           0x55ea4645b770      488b45f8       mov rax, qword [var_8h]
│           0x55ea4645b774      4889c1         mov rcx, rax
│           0x55ea4645b777      ba0c000000     mov edx, 0xc            ; 12
│           0x55ea4645b77c      be01000000     mov esi, 1
│           0x55ea4645b781      488d3ddc0000.  lea rdi, str.Another_line ; 0x55ea4645b864 ; "Another line"
│           0x55ea4645b788      e863feffff     call sym.imp.fwrite     ; size_t fwrite(const void *ptr, size_t size, size_t nitems, FILE *stream)
│           0x55ea4645b78d      488b45f8       mov rax, qword [var_8h]
│           0x55ea4645b791      4889c1         mov rcx, rax
│           0x55ea4645b794      ba1e000000     mov edx, 0x1e           ; 30
│           0x55ea4645b799      be01000000     mov esi, 1
│           0x55ea4645b79e      488d3dd30000.  lea rdi, str.that_follows_the_second_line ; 0x55ea4645b878 ; " that follows the second line\n"
│           0x55ea4645b7a5      e846feffff     call sym.imp.fwrite     ; size_t fwrite(const void *ptr, size_t size, size_t nitems, FILE *stream)
│           0x55ea4645b7aa      488b45f8       mov rax, qword [var_8h]
│           0x55ea4645b7ae      4889c7         mov rdi, rax
│           0x55ea4645b7b1      e80afeffff     call sym.imp.fclose     ; int fclose(FILE *stream)
│           0x55ea4645b7b6      90             nop
│           0x55ea4645b7b7      c9             leave
└           0x55ea4645b7b8      c3             ret
[0x55ea4645b734]> 

And after seeing the disasm, we can quickly see how simple this looks like. It basically calles fopen passing “text.txt” and “wt”(Open a text file, text.txt in this case, for writing. If the file already exists, its contents are destroyed.) fopen returns a file identifier through rax that is stored inside var_8h, then that pointer along with 0xf and “This is a line\n” is passed to fwrite. After that the same operation repeats another two times with different a string each time. Finally the reference to the file is passed to fclose and the function ends. By know you should perfectly know how parameters are passed to functions in the x64 world.

In C, we start working with files by opening them, using functions such as fopen. In brief, fopen() receives a file path and access permissions (read, write) and creates a FILE structure in the user heap containing a file descriptor fs and a fbuf of BLKSIZE size. As I assume that you already have some fundamental knowledge on operating systems I assume that you should know that a file descriptor in this case identifies the open file within the OS level (https://en.wikipedia.org/wiki/File_descriptor). It is important to remark though that what is returned by fopen is not a file descriptor, fopen returns a pointer to a FILE strruct. A file struct contains relevant information related to the file, to make things easy for us humans when working with files, we can use that FILE struct to work with many functions (https://www.studytonight.com/c/file-input-output.php). The thing is that, eventhough some fields like the file descriptor or fields related to buffers or size will be always present, the FILE struct or the way C deals with it may vary with the OS. Windows deals with that using calls to the win api, linux uses syscalls to the kernel some versions of gcc have slightly different details etc, so I wont’ go into a lot of details related to that struct at least for now, I will try to teach you how to deal with that without having any previous knowledge on the struct format.

In general terms, what we must know here is that fopen returns a pointer that points to a place that contains relevant information related to the file (perhaps the content?) and that point is passed to functions that work with the file (for writting, reading, etc).

As we know, functions such as fopen make use of the heap, we’ll inspect the heap with dmhg

We can place a breakpoint after the call and inspect the heap, it will look like:

│           0x55fe49a82743      488d3d010100.  lea rdi, str.test.txt   ; 0x55fe49a8284b ; "test.txt"
│           ;-- rip:
│           0x55fe49a8274a b    e891feffff     call sym.imp.fopen      ; file*fopen(const char *filename, const char *mode)
│           0x55fe49a8274f b    488945f8       mov qword [var_8h], rax
│           0x55fe49a82753      488b45f8       mov rax, qword [var_8h]


[0x55fe49a8274a]> dmhg
No Heap section

But right after executing the fopen:

[0x55fe49a8274f]> dr
rax = 0x55fe4ada2260
rbx = 0x00000000
rcx = 0x00000063
rdx = 0x55fe49a8284a
r8 = 0x0000002c
r9 = 0x00000000
[...]
[0x55fe49a8274f]> dmhg
Heap Layout
┌────────────────────────────────────┐
│    Malloc chunk @ 0x55fe4ada2250   │
│ size: 0x230 status: allocated      │
└────────────────────────────────────┘
    v
    │
    └──┐
       │
   ┌───────────────────────────────┐
   │  Top chunk @ 0x55fe4ada2480   │
   └───────────────────────────────┘

[0x55fe49a8274f]> 

If we follow the execution right to the next hot spot, after the first call to fwrite, we’ll see how more content gets added to the heap and that structure gets updated:

[0x55fe49a82770]> dmhg
Heap Layout
┌────────────────────────────────────┐
│    Malloc chunk @ 0x55fe4ada2250   │
│ size: 0x230 status: allocated      │
└────────────────────────────────────┘
    v
    │
    │
┌────────────────────────────────────┐
│    Malloc chunk @ 0x55fe4ada2480   │
│ size: 0x1010 status: allocated     │
└────────────────────────────────────┘
    v
    │
    └──┐
       │
   ┌───────────────────────────────┐
   │  Top chunk @ 0x55fe4ada3490   │
   └───────────────────────────────┘

[0x55fe49a82770]> pxw @ 0x55fe4ada2480
0x55fe4ada2480  0x33b4ad60 0x00007f00 0x00001011 0x00000000  `..3............
0x55fe4ada2490  0x73696854 0x20736920 0x696c2061 0x000a656e  This is a line..
0x55fe4ada24a0  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55fe4ada24b0  0x00000000 0x00000000 0x00000000 0x00000000  ................

It looks like we succesfully identified a potential buffer for our file. If we follow the execution through all the successive fwrites, we should see the content being written there, the heap space at the end of the execution will look like:

[0x55fe49a827aa]> pxw 900 @ 0x55fe4ada2250
0x55fe4ada2250  0x00000000 0x00000000 0x00000231 0x00000000  ........1.......
0x55fe4ada2260  0xfbad2c84 0x00000000 0x4ada2490 0x000055fe  .,.......$.J.U..
0x55fe4ada2270  0x4ada2490 0x000055fe 0x4ada2490 0x000055fe  .$.J.U...$.J.U..
0x55fe4ada2280  0x4ada2490 0x000055fe 0x4ada24c9 0x000055fe  .$.J.U...$.J.U..
0x55fe4ada2290  0x4ada3490 0x000055fe 0x4ada2490 0x000055fe  .4.J.U...$.J.U..
0x55fe4ada22a0  0x4ada3490 0x000055fe 0x00000000 0x00000000  .4.J.U..........
0x55fe4ada22b0  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55fe4ada22c0  0x00000000 0x00000000 0x33b4f680 0x00007f00  ...........3....
0x55fe4ada22d0  0x00000003 0x00000000 0x00000000 0x00000000  ................
0x55fe4ada22e0  0x00000000 0x00000000 0x4ada2340 0x000055fe  ........@#.J.U..
0x55fe4ada22f0  0xffffffff 0xffffffff 0x00000000 0x00000000  ................
0x55fe4ada2300  0x4ada2350 0x000055fe 0x00000000 0x00000000  P#.J.U..........
0x55fe4ada2310  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55fe4ada2320  0xffffffff 0x00000000 0x00000000 0x00000000  ................
0x55fe4ada2330  0x00000000 0x00000000 0x33b4b2a0 0x00007f00  ...........3....
0x55fe4ada2340  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55fe4ada2350  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55fe4ada2360  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55fe4ada2370  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55fe4ada2380  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55fe4ada2390  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55fe4ada23a0  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55fe4ada23b0  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55fe4ada23c0  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55fe4ada23d0  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55fe4ada23e0  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55fe4ada23f0  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55fe4ada2400  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55fe4ada2410  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55fe4ada2420  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55fe4ada2430  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55fe4ada2440  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55fe4ada2450  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55fe4ada2460  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55fe4ada2470  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55fe4ada2480  0x33b4ad60 0x00007f00 0x00001011 0x00000000  `..3............
0x55fe4ada2490  0x73696854 0x20736920 0x696c2061 0x410a656e  This is a line.A
0x55fe4ada24a0  0x68746f6e 0x6c207265 0x20656e69 0x74616874  nother line that
0x55fe4ada24b0  0x6c6f6620 0x73776f6c 0x65687420 0x63657320   follows the sec
0x55fe4ada24c0  0x20646e6f 0x656e696c 0x0000000a 0x00000000  ond line........

Voilà, there is the content that goes written to the test.txt file. Was that useful in this case? Depends a lot on what you are looking for though…but you can be sure that inspecting the heap after suspicious functions get executed can lead to good findings.

Read from file

The last one was simple, let’s now read from a file.

#include <stdio.h>
#include <string.h>
main(){
funcion();
getchar();
}


funcion()
{ 

    FILE* ftest;
    char name[80] = "test.txt";
    char line[81];
 
    ftest = fopen(name, "rt");
 
    if (ftest == NULL)
    {
      printf("file not found!\n");
      exit(1);
    }
    fgets(line, 80, ftest);
    puts(line);
    fgets(line, 80, ftest);
    puts(line);
    fclose(ftest);


}

This time, the program initializes some space in memory, and opens text.txt with fopen. Fopen will return NULL if there is some error opening the file, so the program uses cmp and jne here to check if the file opens corrrectly or not, if there is some kind of error opening the file, the program calls exit(); and quits.

│           0x55d1449c31c9      48b874657374.  movabs rax, 0x7478742e74736574 ; 'test.txt'
│           0x55d1449c31d3      ba00000000     mov edx, 0
│           0x55d1449c31d8      48898550ffff.  mov qword [var_b0h], rax
│           0x55d1449c31df      48899558ffff.  mov qword [var_a8h], rdx
│           0x55d1449c31e6      48c78560ffff.  mov qword [var_a0h], 0
│           0x55d1449c31f1      48c78568ffff.  mov qword [var_98h], 0
│           0x55d1449c31fc      48c78570ffff.  mov qword [var_90h], 0
│           0x55d1449c3207      48c78578ffff.  mov qword [var_88h], 0
│           0x55d1449c3212      48c745800000.  mov qword [var_80h], 0
│           0x55d1449c321a      48c745880000.  mov qword [var_78h], 0
│           0x55d1449c3222      48c745900000.  mov qword [var_70h], 0
│           0x55d1449c322a      48c745980000.  mov qword [var_68h], 0
│           0x55d1449c3232      488d8550ffff.  lea rax, [var_b0h]
│           0x55d1449c3239      488d35c40d00.  lea rsi, [0x55d1449c4004] ; "rt"
│           0x55d1449c3240      4889c7         mov rdi, rax
│           0x55d1449c3243      e838feffff     call sym.imp.fopen      ; file*fopen(const char *filename, const char *mode)
│           0x55d1449c3248 b    48898548ffff.  mov qword [var_b8h], rax
│           0x55d1449c324f      4883bd48ffff.  cmp qword [var_b8h], 0
│       ┌─< 0x55d1449c3257      7516           jne 0x55d1449c326f

As we see, the check is done here, if file is not found the program will prompt it and will exit with the code 1 (exit with error).

│       ┌─< 0x55d1449c3257      7516           jne 0x55d1449c326f
│       │   0x55d1449c3259      488d3da70d00.  lea rdi, str.file_not_found ; 0x55d1449c4007 ; "file not found!"
│       │   0x55d1449c3260      e8cbfdffff     call sym.imp.puts       ; int puts(const char *s)
│       │   0x55d1449c3265      bf01000000     mov edi, 1
│       │   0x55d1449c326a      e821feffff     call sym.imp.exit       ; void exit(int status)
│       └─> 0x55d1449c326f      488b9548ffff.  mov rdx, qword [var_b8h]

Then here we have the file pointer:

│           0x55d1449c3240      4889c7         mov rdi, rax
│           0x55d1449c3243      e838feffff     call sym.imp.fopen      ; file*fopen(const char *filename, const char *mode)
│           ;-- rip:
│           0x55d1449c3248 b    48898548ffff.  mov qword [var_b8h], rax
│           0x55d1449c324f      4883bd48ffff.  cmp qword [var_b8h], 0

In memory:

[0x55d1449c3248]> dr
rax = 0x55d145eef260
rbx = 0x00000000
rcx = 0x00000005
rdx = 0x00000000
r8 = 0x00000000
r9 = 0x55d1449c4006
r10 = 0x00000000
r11 = 0x00000246
r12 = 0x55d1449c30b0
r13 = 0x7ffc18e1dc20
r14 = 0x00000000
r15 = 0x00000000
rsi = 0x00000000
rdi = 0x55d1449c4005
rsp = 0x7ffc18e1da70
rbp = 0x7ffc18e1db30
rip = 0x55d1449c3248
rflags = 0x00000206
orax = 0xffffffffffffffff
[0x55d1449c3248]> pxw @ 0x55d145eef260
0x55d145eef260  0xfbad2488 0x00000000 0x00000000 0x00000000  .$..............
0x55d145eef270  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55d145eef280  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55d145eef290  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55d145eef2a0  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55d145eef2b0  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55d145eef2c0  0x00000000 0x00000000 0xf2630680 0x00007f28  ..........c.(...
0x55d145eef2d0  0x00000003 0x00000000 0x00000000 0x00000000  ................
0x55d145eef2e0  0x00000000 0x00000000 0x45eef340 0x000055d1  ........@..E.U..
0x55d145eef2f0  0xffffffff 0xffffffff 0x00000000 0x00000000  ................
0x55d145eef300  0x45eef350 0x000055d1 0x00000000 0x00000000  P..E.U..........
0x55d145eef310  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55d145eef320  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x55d145eef330  0x00000000 0x00000000 0xf2631560 0x00007f28  ........`.c.(...
0x55d145eef340  0x00000000 0x00000000 0x00000000 0x00000000  ................

If we inspect the disasm we will see that fgets(line, 80, ftest); uses three parameters, it sends a pointer to a space in memory, a number and then a pointer to a FILE struct. So in this case, fgets will dump 80 characters (or everything untill the end of the file or a line jump is detected) to memory starting at the address pointed by the parameter we pass to it. We can debug the program and see it like this:

[0x55d1449c3282]> dr
rax = 0x7ffc18e1dad0
rbx = 0x00000000
rcx = 0x00000005
rdx = 0x55d145eef260
r8 = 0x00000000
r9 = 0x55d1449c4006
r10 = 0x00000000
r11 = 0x00000246
r12 = 0x55d1449c30b0
r13 = 0x7ffc18e1dc20
r14 = 0x00000000
r15 = 0x00000000
rsi = 0x00000050
rdi = 0x7ffc18e1dad0
rsp = 0x7ffc18e1da70
rbp = 0x7ffc18e1db30
rip = 0x55d1449c3282
rflags = 0x00000206
orax = 0xffffffffffffffff
# BEFORE fgets() IS CALLED
[0x55d1449c3282]> pxw @ 0x7ffc18e1dad0
0x7ffc18e1dad0  0x00000000 0x00000000 0x00f0b5ff 0x00000000  ................
0x7ffc18e1dae0  0x000000c2 0x00000000 0x18e1db16 0x00007ffc  ................
0x7ffc18e1daf0  0x00000001 0x00000000 0xf2505b55 0x00007f28  ........U[P.(...
0x7ffc18e1db00  0x00000000 0x00000000 0x449c3325 0x000055d1  ........%3.D.U..
0x7ffc18e1db10  0xf2661b20 0x00007f28 0x00000000 0x00000000   .f.(...........
0x7ffc18e1db20  0x449c32e0 0x000055d1 0xb67e8600 0x110eefa4  .2.D.U....~.....
0x7ffc18e1db30  0x18e1db40 0x00007ffc 0x449c31a3 0x000055d1  @........1.D.U..
0x7ffc18e1db40  0x449c32e0 0x000055d1 0xf2471b6b 0x00007f28  .2.D.U..k.G.(...
0x7ffc18e1db50  0x00000000 0x00000000 0x18e1dc28 0x00007ffc  ........(.......
0x7ffc18e1db60  0x00040000 0x00000001 0x449c3195 0x000055d1  .........1.D.U..
0x7ffc18e1db70  0x00000000 0x00000000 0xdb1818b4 0xd55ddb37  ............7.].
0x7ffc18e1db80  0x449c30b0 0x000055d1 0x18e1dc20 0x00007ffc  .0.D.U.. .......
0x7ffc18e1db90  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x7ffc18e1dba0  0x087818b4 0x810763cc 0x889e18b4 0x80aeb681  ..x..c..........
0x7ffc18e1dbb0  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x7ffc18e1dbc0  0x00000000 0x00000000 0x18e1dc38 0x00007ffc  ........8.......
[0x55d1449c3282]> dc
hit breakpoint at: 55d1449c3287
# AFTER fgets() IS CALLED
[0x55d1449c3287]> pxw @ 0x7ffc18e1dad0
0x7ffc18e1dad0  0x4c4c4548 0x4c45484f 0x45484f4c 0x0a4f4c4c  HELLOHELLOHELLO.
0x7ffc18e1dae0  0x00000000 0x00000000 0x18e1db16 0x00007ffc  ................
0x7ffc18e1daf0  0x00000001 0x00000000 0xf2505b55 0x00007f28  ........U[P.(...
0x7ffc18e1db00  0x00000000 0x00000000 0x449c3325 0x000055d1  ........%3.D.U..
0x7ffc18e1db10  0xf2661b20 0x00007f28 0x00000000 0x00000000   .f.(...........
0x7ffc18e1db20  0x449c32e0 0x000055d1 0xb67e8600 0x110eefa4  .2.D.U....~.....
0x7ffc18e1db30  0x18e1db40 0x00007ffc 0x449c31a3 0x000055d1  @........1.D.U..
0x7ffc18e1db40  0x449c32e0 0x000055d1 0xf2471b6b 0x00007f28  .2.D.U..k.G.(...
0x7ffc18e1db50  0x00000000 0x00000000 0x18e1dc28 0x00007ffc  ........(.......
0x7ffc18e1db60  0x00040000 0x00000001 0x449c3195 0x000055d1  .........1.D.U..
0x7ffc18e1db70  0x00000000 0x00000000 0xdb1818b4 0xd55ddb37  ............7.].
0x7ffc18e1db80  0x449c30b0 0x000055d1 0x18e1dc20 0x00007ffc  .0.D.U.. .......
0x7ffc18e1db90  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x7ffc18e1dba0  0x087818b4 0x810763cc 0x889e18b4 0x80aeb681  ..x..c..........
0x7ffc18e1dbb0  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x7ffc18e1dbc0  0x00000000 0x00000000 0x18e1dc38 0x00007ffc  ........8.......
[0x55d1449c3287]> 

The second fgets does the same:


│           ;-- rip:
│           0x55d1449c32a6 b    e8b5fdffff     call sym.imp.fgets      ; char *fgets(char *s, int size, FILE *stream)
│           0x55d1449c32ab      488d45a0       lea rax, [var_60h]
│           0x55d1449c32af      4889c7         mov rdi, rax
│           0x55d1449c32b2      e879fdffff     call sym.imp.puts       ; int puts(const char *s)
│           0x55d1449c32b7      488b8548ffff.  mov rax, qword [var_b8h]
│           0x55d1449c32be      4889c7         mov rdi, rax
│           0x55d1449c32c1      e87afdffff     call sym.imp.fclose     ; int fclose(FILE *stream)
│           0x55d1449c32c6      90             nop
│           0x55d1449c32c7      488b4df8       mov rcx, qword [var_8h]
│           0x55d1449c32cb      6448330c2528.  xor rcx, qword fs:[0x28]
│       ┌─< 0x55d1449c32d4      7405           je 0x55d1449c32db
│       │   0x55d1449c32d6      e875fdffff     call sym.imp.__stack_chk_fail ; void __stack_chk_fail(void)
│       └─> 0x55d1449c32db      c9             leave
└           0x55d1449c32dc      c3             ret
[0x55d1449c32a6]> db 0x55d1449c32ab
[0x55d1449c32a6]> pxw @ 0x7ffc18e1dad0
0x7ffc18e1dad0  0x4c4c4548 0x4c45484f 0x45484f4c 0x0a4f4c4c  HELLOHELLOHELLO.
0x7ffc18e1dae0  0x00000000 0x00000000 0x18e1db16 0x00007ffc  ................
0x7ffc18e1daf0  0x00000001 0x00000000 0xf2505b55 0x00007f28  ........U[P.(...
0x7ffc18e1db00  0x00000000 0x00000000 0x449c3325 0x000055d1  ........%3.D.U..
0x7ffc18e1db10  0xf2661b20 0x00007f28 0x00000000 0x00000000   .f.(...........
0x7ffc18e1db20  0x449c32e0 0x000055d1 0xb67e8600 0x110eefa4  .2.D.U....~.....
0x7ffc18e1db30  0x18e1db40 0x00007ffc 0x449c31a3 0x000055d1  @........1.D.U..
0x7ffc18e1db40  0x449c32e0 0x000055d1 0xf2471b6b 0x00007f28  .2.D.U..k.G.(...
0x7ffc18e1db50  0x00000000 0x00000000 0x18e1dc28 0x00007ffc  ........(.......
0x7ffc18e1db60  0x00040000 0x00000001 0x449c3195 0x000055d1  .........1.D.U..
0x7ffc18e1db70  0x00000000 0x00000000 0xdb1818b4 0xd55ddb37  ............7.].
0x7ffc18e1db80  0x449c30b0 0x000055d1 0x18e1dc20 0x00007ffc  .0.D.U.. .......
0x7ffc18e1db90  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x7ffc18e1dba0  0x087818b4 0x810763cc 0x889e18b4 0x80aeb681  ..x..c..........
0x7ffc18e1dbb0  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x7ffc18e1dbc0  0x00000000 0x00000000 0x18e1dc38 0x00007ffc  ........8.......
[0x55d1449c32a6]> dc
hit breakpoint at: 55d1449c32ab
[0x55d1449c32ab]> pxw @ 0x7ffc18e1dad0
0x7ffc18e1dad0  0x4c524f57 0x524f5744 0x4f57444c 0x0a444c52  WORLDWORLDWORLD.
0x7ffc18e1dae0  0x00000000 0x00000000 0x18e1db16 0x00007ffc  ................
0x7ffc18e1daf0  0x00000001 0x00000000 0xf2505b55 0x00007f28  ........U[P.(...
0x7ffc18e1db00  0x00000000 0x00000000 0x449c3325 0x000055d1  ........%3.D.U..
0x7ffc18e1db10  0xf2661b20 0x00007f28 0x00000000 0x00000000   .f.(...........
0x7ffc18e1db20  0x449c32e0 0x000055d1 0xb67e8600 0x110eefa4  .2.D.U....~.....
0x7ffc18e1db30  0x18e1db40 0x00007ffc 0x449c31a3 0x000055d1  @........1.D.U..
0x7ffc18e1db40  0x449c32e0 0x000055d1 0xf2471b6b 0x00007f28  .2.D.U..k.G.(...
0x7ffc18e1db50  0x00000000 0x00000000 0x18e1dc28 0x00007ffc  ........(.......
0x7ffc18e1db60  0x00040000 0x00000001 0x449c3195 0x000055d1  .........1.D.U..
0x7ffc18e1db70  0x00000000 0x00000000 0xdb1818b4 0xd55ddb37  ............7.].
0x7ffc18e1db80  0x449c30b0 0x000055d1 0x18e1dc20 0x00007ffc  .0.D.U.. .......
0x7ffc18e1db90  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x7ffc18e1dba0  0x087818b4 0x810763cc 0x889e18b4 0x80aeb681  ..x..c..........
0x7ffc18e1dbb0  0x00000000 0x00000000 0x00000000 0x00000000  ................
0x7ffc18e1dbc0  0x00000000 0x00000000 0x18e1dc38 0x00007ffc  ........8.......
[0x55d1449c32ab]> 

Note that as we are using the same char array (buffer) to read, the second fgets overwrites what was dumped on the first fgets. If we want keep both lines or do some more advanced read/write operations that may include changes of positions we can declare a bigger array and use fgets, seek and fgets, but wait what is seek? Let’s see(k) it.

Seek in file and write

You may have noticed that as we call fgets multiple times we go forward in the file content, what if we want to go back? What if we want to directly jump at the end? fseek comes in very handy when dealing with these cases. Let’s inspect the following program:

#include <stdio.h>

int main () {
   FILE *fp;

   fp = fopen("fseek.txt","w+");
   fputs("This is a simple file, feel free to visit artik.blue to get fresh reversing stuff", fp);
  
   fseek( fp, 7, SEEK_SET );
   fputs(" C Programming Language", fp);
   fclose(fp);
   
   return(0);
}

As we can see, it first writes to a file, then seeks the pointer to the 7th position (think about a char array) then writes content there and then closes the file and exits. Inside radare:

[0x7f376be20090]> s main
[0x560d0969d165]> pdf
            ; DATA XREF from entry0 @ 0x560d0969d09d
┌ 130: int main (int argc, char **argv, char **envp);
│           ; var int64_t var_8h @ rbp-0x8
│           0x560d0969d165      55             push rbp
│           0x560d0969d166      4889e5         mov rbp, rsp
│           0x560d0969d169      4883ec10       sub rsp, 0x10
│           0x560d0969d16d      488d35940e00.  lea rsi, [0x560d0969e008] ; "w+"
│           0x560d0969d174      488d3d900e00.  lea rdi, str.fseek.txt  ; 0x560d0969e00b ; "fseek.txt"
│           0x560d0969d17b      e8d0feffff     call sym.imp.fopen      ; file*fopen(const char *filename, const char *mode)
│           0x560d0969d180      488945f8       mov qword [var_8h], rax
│           0x560d0969d184      488b45f8       mov rax, qword [var_8h]
│           0x560d0969d188      4889c1         mov rcx, rax
│           0x560d0969d18b      ba51000000     mov edx, 0x51           ; 'Q' ; 81
│           0x560d0969d190      be01000000     mov esi, 1
│           0x560d0969d195      488d3d7c0e00.  lea rdi, str.This_is_a_simple_file__feel_free_to_visit_artik.blue_to_get_fresh_reversing_stuff ; 0x560d0969e018 ; "This is a simple file, feel free to visit artik.blue to get fresh reversing stuff"
│           0x560d0969d19c      e8bffeffff     call sym.imp.fwrite     ; size_t fwrite(const void *ptr, size_t size, size_t nitems, FILE *stream)
│           0x560d0969d1a1      488b45f8       mov rax, qword [var_8h]
│           0x560d0969d1a5      ba00000000     mov edx, 0
│           0x560d0969d1aa      be07000000     mov esi, 7
│           0x560d0969d1af      4889c7         mov rdi, rax
│           0x560d0969d1b2      e889feffff     call sym.imp.fseek      ; int fseek(FILE *stream, long offset, int whence)
│           0x560d0969d1b7      488b45f8       mov rax, qword [var_8h]
│           0x560d0969d1bb      4889c1         mov rcx, rax
│           0x560d0969d1be      ba17000000     mov edx, 0x17           ; 23
│           0x560d0969d1c3      be01000000     mov esi, 1
│           0x560d0969d1c8      488d3d9b0e00.  lea rdi, str.C_Programming_Language ; 0x560d0969e06a ; " C Programming Language"
│           0x560d0969d1cf      e88cfeffff     call sym.imp.fwrite     ; size_t fwrite(const void *ptr, size_t size, size_t nitems, FILE *stream)
│           0x560d0969d1d4      488b45f8       mov rax, qword [var_8h]
│           0x560d0969d1d8      4889c7         mov rdi, rax
│           0x560d0969d1db      e850feffff     call sym.imp.fclose     ; int fclose(FILE *stream)
│           0x560d0969d1e0      b800000000     mov eax, 0
│           0x560d0969d1e5      c9             leave
└           0x560d0969d1e6      c3             ret
[0x560d0969d165]> 

We see the same as before, except this time a new call comes in (seek). fseek receives 0x7 and the var that contains the FILE pointer, no surprises.

│           0x560d0969d1a1      488b45f8       mov rax, qword [var_8h]
│           0x560d0969d1a5      ba00000000     mov edx, 0
│           0x560d0969d1aa      be07000000     mov esi, 7
│           0x560d0969d1af      4889c7         mov rdi, rax
│           0x560d0969d1b2      e889feffff     call sym.imp.fseek      ; int fseek(FILE *stream, long offset, int whence)
│           0x560d0969d1b7      488b45f8       mov rax, qword [var_8h]

After fseek is called, no value is being taken, the system will now that it has to write starting at pos 7. Also note that no string sizes were declared on the C program, the good compiler calculates the size and inserts the parameter there for us (This is a simple file, feel free to visit artik.blue to get fresh reversing stuff = 81 characters 0x51 = dec 81).

│           0x560d0969d1b7      488b45f8       mov rax, qword [var_8h]
│           0x560d0969d1bb      4889c1         mov rcx, rax
│           0x560d0969d1be      ba17000000     mov edx, 0x17           ; 23
│           0x560d0969d1c3      be01000000     mov esi, 1
│           0x560d0969d1c8      488d3d9b0e00.  lea rdi, str.C_Programming_Language ; 0x560d0969e06a ; " C Programming Language"
│           0x560d0969d1cf      e88cfeffff     call sym.imp.fwrite     ; size_t fwrite(const void *ptr, size_t size, size_t nitems, FILE *stream)
│           0x560d0969d1d4      488b45f8       mov rax, qword [var_8h]

So we already know how this works, nothing new here.

Read the whole file line by line

What if we just want to read the full file, line by line till the end. We keep reading lines using fgets until we reach EOF. Let’s see:

#include <stdio.h>

void main(){
char buffer[500];
FILE *fp;
int lineno = 0;
if ((fp = fopen("myinputfile.txt","r")) == NULL)
{
        printf("Could not open myinputfile.txt\n");
        exit(1);
}

while ( !feof(fp))
{
        // read in the line and make sure it was successful
        if (fgets(buffer,500,fp) != NULL)
        {
                printf("%d: %s",lineno++,buffer);
        }
}
}

This function should look familiar to us:

[0x55651c3ee195]> pdf
            ; DATA XREF from entry0 @ 0x55651c3ee0cd
┌ 209: int main (int argc, char **argv, char **envp);
│           ; var int64_t var_20ch @ rbp-0x20c
│           ; var int64_t var_208h @ rbp-0x208
│           ; var int64_t var_200h @ rbp-0x200
│           ; var int64_t var_8h @ rbp-0x8
│           0x55651c3ee195      55             push rbp
│           0x55651c3ee196      4889e5         mov rbp, rsp
│           0x55651c3ee199      4881ec100200.  sub rsp, 0x210
│           0x55651c3ee1a0      64488b042528.  mov rax, qword fs:[0x28]
│           0x55651c3ee1a9      488945f8       mov qword [var_8h], rax
│           0x55651c3ee1ad      31c0           xor eax, eax
│           0x55651c3ee1af      c785f4fdffff.  mov dword [var_20ch], 0
│           0x55651c3ee1b9      488d35480e00.  lea rsi, [0x55651c3ef008] ; "r"
│           0x55651c3ee1c0      488d3d430e00.  lea rdi, str.myinputfile.txt ; 0x55651c3ef00a ; "myinputfile.txt"
│           0x55651c3ee1c7      e8b4feffff     call sym.imp.fopen      ; file*fopen(const char *filename, const char *mode)
│           0x55651c3ee1cc      488985f8fdff.  mov qword [var_208h], rax
│           0x55651c3ee1d3      4883bdf8fdff.  cmp qword [var_208h], 0
│       ┌─< 0x55651c3ee1db      755f           jne 0x55651c3ee23c
│       │   0x55651c3ee1dd      488d3d3c0e00.  lea rdi, str.Could_not_open_myinputfile.txt ; 0x55651c3ef020 ; "Could not open myinputfile.txt"
│       │   0x55651c3ee1e4      e847feffff     call sym.imp.puts       ; int puts(const char *s)
│       │   0x55651c3ee1e9      bf01000000     mov edi, 1
│       │   0x55651c3ee1ee      e89dfeffff     call sym.imp.exit       ; void exit(int status)
│      ┌──> 0x55651c3ee1f3      488b95f8fdff.  mov rdx, qword [var_208h]
│      ╎│   0x55651c3ee1fa      488d8500feff.  lea rax, [var_200h]
│      ╎│   0x55651c3ee201      bef4010000     mov esi, 0x1f4          ; 500
│      ╎│   0x55651c3ee206      4889c7         mov rdi, rax
│      ╎│   0x55651c3ee209      e852feffff     call sym.imp.fgets      ; char *fgets(char *s, int size, FILE *stream)
│      ╎│   0x55651c3ee20e      4885c0         test rax, rax
│     ┌───< 0x55651c3ee211      7429           je 0x55651c3ee23c
│     │╎│   0x55651c3ee213      8b85f4fdffff   mov eax, dword [var_20ch]
│     │╎│   0x55651c3ee219      8d5001         lea edx, [rax + 1]
│     │╎│   0x55651c3ee21c      8995f4fdffff   mov dword [var_20ch], edx
│     │╎│   0x55651c3ee222      488d9500feff.  lea rdx, [var_200h]
│     │╎│   0x55651c3ee229      89c6           mov esi, eax
│     │╎│   0x55651c3ee22b      488d3d0d0e00.  lea rdi, str.d:__s      ; 0x55651c3ef03f ; "%d: %s"
│     │╎│   0x55651c3ee232      b800000000     mov eax, 0
│     │╎│   0x55651c3ee237      e814feffff     call sym.imp.printf     ; int printf(const char *format)
│     └─└─> 0x55651c3ee23c      488b85f8fdff.  mov rax, qword [var_208h]
│      ╎    0x55651c3ee243      4889c7         mov rdi, rax
│      ╎    0x55651c3ee246      e825feffff     call sym.imp.feof       ; int feof(FILE *stream)
│      ╎    0x55651c3ee24b      85c0           test eax, eax
│      └──< 0x55651c3ee24d      74a4           je 0x55651c3ee1f3
│           0x55651c3ee24f      90             nop
│           0x55651c3ee250      488b45f8       mov rax, qword [var_8h]
│           0x55651c3ee254      644833042528.  xor rax, qword fs:[0x28]
│       ┌─< 0x55651c3ee25d      7405           je 0x55651c3ee264
│       │   0x55651c3ee25f      e8dcfdffff     call sym.imp.__stack_chk_fail ; void __stack_chk_fail(void)
│       └─> 0x55651c3ee264      c9             leave
└           0x55651c3ee265      c3             ret
[0x55651c3ee195]> 

As this may look a bit scary, we can use the r2dec decompiler! :) as we learned in the previous post:

#include <stdint.h>
 
int32_t main (void) {
    int64_t var_20ch;
    int64_t var_208h;
    int64_t var_200h;
    int64_t var_8h;
    rax = *(fs:0x28);
    var_8h = *(fs:0x28);
    eax = 0;
    var_20ch = 0;
    rax = fopen ("myinputfile.txt", 0x55651c3ef008);
    var_208h = rax;
    if (var_208h != 0) {
        goto label_0;
    }
    puts ("Could not open myinputfile.txt");
    exit (1);
    do {
        rax = &var_200h;
        rax = fgets (rax, 0x1f4, var_208h);
        if (rax != 0) {
            eax = var_20ch;
            edx = rax + 1;
            var_20ch = edx;
            rdx = &var_200h;
            esi = eax;
            eax = 0;
            printf ("%d: %s");
        }
label_0:
        rax = var_208h;
        eax = feof (var_208h);
    } while (eax == 0);
    rax = var_8h;
    rax ^= *(fs:0x28);
    if (eax != 0) {
        stack_chk_fail ();
    }
    return rax;
}

So the key thing happens when comparing eax with 0 after calling feof.

that comparision happens here:

│      ╎    0x55651c3ee246      e825feffff     call sym.imp.feof       ; int feof(FILE *stream)
│      ╎    0x55651c3ee24b      85c0           test eax, eax
│      └──< 0x55651c3ee24d      74a4           je 0x55651c3ee1f3

In this case, feof will receive the FILE struct and will inspect the current position of the internal pointer, then it will determine if its the end of the file or not, easy!

As the rest of the program should be familiar to you I can proudly say that this post is over! See you on the next post of the course.

Reverse engineering 32 and 64 bits binaries with Radare2 - 9 (files; read, write, seek and some heaps :O)
Older post

Reverse engineering 32 and 64 bits binaries with Radare2 - 8 (start doing crackmes now!)

Newer post

Reverse engineering 32 and 64 bits binaries with Radare2 - 10 (pointers and dynamic memory)

Reverse engineering 32 and 64 bits binaries with Radare2 - 9 (files; read, write, seek and some heaps :O)