Compare commits

..

No commits in common. "619056a0a7f1549cc88f43e1a9aba9995ef6d155" and "17d731e9b883c4763ec25d659403e46c11c48394" have entirely different histories.

38 changed files with 254 additions and 2897 deletions

133
html/assets/style.css Normal file

@ -0,0 +1,133 @@
* {
margin: 0;
padding: 0;
}
body, html {
height: 100%;
}
.sr-only {
position:absolute;
left:-10000px;
top:auto;
width:1px;
height:1px;
overflow:hidden;
}
::-moz-selection {
color: #000b13;
background: #c42337;
}
::selection {
color: #040404;
background: #d7d9ce;
}
body {
background-color: #040404;
color: #119da4;
font-family: monospace;
font-size: 1.0em;
display: flex;
justify-content: center;
align-items: center;
flex-direction: column;
max-width: 1500px;
margin: auto;
}
header {
text-align: center;
font-size: 1.2em;
}
header h1 {
font-size: 4em;
}
header h2 {
font-size: 2em;
}
header img {
border-radius: 100%;
width: 220px;
margin: 10px;
}
header .socials {
margin-top: 5px;
list-style-type: none;
font-size: 1.4em;
}
header .socials li {
display: inline-block;
}
header .socials a {
color: inherit;
text-decoration: none;
}
header .socials a:hover {
color: #d7d9ce;
}
.articles {
margin-top: 50px;
font-size: 1.3em;
}
.articles h3 {
font-size: 1em;
font-weight: 800;
color: #f4b65c;
}
.articles ul {
list-style-type: none;
}
.articles li {
margin-top: 0.4em;
margin-bottom: 0.4em;
}
.articles a {
color: #7f8589;
text-decoration: none;
}
.articles a:hover {
color: #c42337;
}
article h1 {
color: #f4b65c;
margin-bottom: 0.7em;
}
article {
font-size: 1.3em;
}
article p {
padding-top: 3px;
padding-bottom: 3px;
text-align: justify;
}
article a {
color: #119da4;
text-decoration: none;
}
article a:hover {
color: #d7d9ce;
}

17
html/blog/article.html Normal file

@ -0,0 +1,17 @@
<!DOCTYPE html>
<html lang="en">
<head>
<title>Juju</title>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=0.6">
<link rel="stylesheet" href="../assets/style.css">
<link href="https://cdn.jsdelivr.net/npm/remixicon@2.5.0/fonts/remixicon.css" rel="stylesheet">
</head>
<body>
<article>
<h1>Article</h1>
<p>Test</p>
</article>
</body>
</html>

Binary file not shown.

@ -0,0 +1,25 @@
<!DOCTYPE html>
<html lang="en">
<head>
<title>Juju</title>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=0.6">
<link rel="stylesheet" href="../../assets/style.css">
<link href="https://cdn.jsdelivr.net/npm/remixicon@2.5.0/fonts/remixicon.css" rel="stylesheet">
</head>
<body>
<article>
<h1>Introduction à la rétro-ingénierie et à l'exploitation logicielle</h1>
<h2>
<a href="https://www.youtube.com/watch?v=5g2eZSST7YE">
<i class="ri-youtube-line"></i>
</a>
<a href="binary_exploitation.pdf">
<i class="ri-slideshow-line"></i>
</a>
</h2>
</article>
</body>
</html>

19
html/blog/index.html Normal file

@ -0,0 +1,19 @@
<!DOCTYPE html>
<html lang="en">
<head>
<title>Juju</title>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=0.6">
<link rel="stylesheet" href="../assets/style.css">
<link href="https://cdn.jsdelivr.net/npm/remixicon@2.5.0/fonts/remixicon.css" rel="stylesheet">
</head>
<body>
<article>
<h1>Posts</h1>
<li>
<h3><a href=conf_exploit/>Introduction à la rétro-ingénierie et à l'exploitation logicielle [FR]</a></h3>
</li>
</article>
</body>
</html>

BIN
html/favicon.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

60
html/index.html Normal file

@ -0,0 +1,60 @@
<!--
Credits where it's due, https://landryl.fr who allowed me to use his css
-->
<!DOCTYPE html>
<html lang="en">
<head>
<title>Juju</title>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=0.6">
<link rel="stylesheet" href="assets/style.css">
<link href="https://cdn.jsdelivr.net/npm/remixicon@2.5.0/fonts/remixicon.css" rel="stylesheet">
</head>
<body>
<header>
<img src="juju.jpg" alt="Photo of myself">
<h1>Juju</h1>
<ul class="socials">
<li>
<a href="https://github.com/Azomasiel/">
<i class="ri-github-line"></i>
<span class="sr-only">Github profile</span>
</a>
</li>
<li>
<a href="https://git.juju.re/explore/repos">
<i class="ri-open-source-line"></i>
<span class="sr-only">Turbogit</span>
</a>
</li>
<li>
<a href="https://www.linkedin.com/in/julien-clement-0891ab199/">
<i class="ri-linkedin-box-line"></i>
<span class="sr-only">Linkedin profile</span>
</a>
</li>
<li>
<a href="https://www.root-me.org/Azomasiel">
<i class="ri-skull-line"></i>
<span class="sr-only">Root-me profile</span>
</a>
</li>
<li>
<a href="https://cryptohack.org/user/Azomasiel/">
<i class="ri-shield-keyhole-line"></i>
<span class="sr-only">Cryptohack profile</span>
</a>
</li>
</ul>
<ul class="socials">
<li>
<a href="blog/">
<i class="ri-booklet-line"></i>
<span class="sr-only">Github profile</span>
</a>
</li>
</ul>
</header>
</body>
</html>

BIN
html/juju.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 MiB

@ -1,325 +0,0 @@
---
title: "Black box reversing | Archiver @ FCSC 2024"
date: "2024-04-13 22:00:00"
author: "Juju"
tags: ["Reverse", "Writeup", "fcsc"]
toc: true
---
# Intro
This writeup is not a really serious one, if you are reading this
as part of the FCSC writeup reviews, my submitted writeups are
actually for `svartalfheim` and `megalosaure`. I still thought
it would be funny to include this one as it is not that long and
I think my solution is kind of unintended.
Basically we are given a `Windows` stripped binary, compiled
from `rust`. The binary is an encrypted archive manager.
This challenge managed to put every single reverser red flag
in a single binary.
I tried opening up the binary in `binary ninja` or `IDA`, but it
was as expected: stripped `rust`.
So let's close this up, never touch it again and see what we can
do without reading the code or debugging.
{{< image src="/archiver/meme.jpg" style="border-radius: 8px;" >}}
## Challenge description
`reverse` | `490 pts` `5 solves` `:star::star:`
```
Notre équipe SIGINT a intercepté un e-mail contenant une pièce jointe
result.fcsc. Vu l'extension, il doit s'agir d'une archive au format
propriétaire FCSC. Nous avons pu récupérer l'utilitaire dans sa version
Windows, archiver.exe.
Vu le contenu de l'e-mail, ça a l'air assez important. Est-ce qu'on a une
chance de savoir ce qu'il y a dedans ?
Votre prédécesseur, après avoir réussi une analyse similaire, a sombré
dans la folie et réside désormais dans un asile psychiatrique. Il
murmurait des mots étranges comme "TTD" et parlait sans arrêt de hardware
breakpoint.
Note : La chaîne que vous trouverez est à mettre entre FCSC{} pour avoir
le flag.
```
Authors: `commial` `YoMama`
## Given files
[archiver.exe](/archiver/archiver.exe)
[result.fcsc](/archiver/result.fcsc)
# Writeup
## Overview
With the binary, we are also given a `result.fcsc`, which is
an archive encrypted with the given binary.
This archive contains the flag and we must decrypt it.
```console
$ xxd result.fcsc
00000000: 0100 0000 0000 0000 21e2 ae0f b85f de7b ........!...._.{
00000010: b246 ed90 194f 601e 041b 3c8a c6e9 37b1 .F...O`...<...7.
00000020: 878b d8e0 e796 a098 1800 0000 0000 0000 ................
00000030: d44a b96f ea76 8c55 73a0 7266 1a5e b5fd .J.o.v.Us.rf.^..
00000040: c3bf cc29 8ed7 e925 1800 0000 0000 0000 ...)...%........
00000050: f96d be51 956d 9342 7e3d 1c0d bbef ad7a .m.Q.m.B~=.....z
00000060: bc57 7bf0 f36a cb23 .W{..j.#
```
## Common fields
### Sizes
First thing we can see are 3 `uint64_t` packed in little endian at offsets:
* `0x0`
* `0x28`
* `0x48`
Given how small these numbers are, they are probably representing
some sizes.
If we count manually, we see that the last two `uint64_t` represent the size of the data that immediatly follows them.
The first one is still unknown but as it is `1`, it probably just
is the number of files in the archive.
### sha256
Now let's try to create our own archive:
```console
$ echo -n 'FCSC{test_flag}' > flag.txt
$ ./archiver.exe create password test.db flag.txt
$ xxd test.db
00000000: 0100 0000 0000 0000 21e2 ae0f b85f de7b ........!...._.{
00000010: b246 ed90 194f 601e 041b 3c8a c6e9 37b1 .F...O`...<...7.
00000020: 878b d8e0 e796 a098 1800 0000 0000 0000 ................
00000030: 8257 3ccc 2608 498d b6b7 5801 740f 2e4e .W<.&.I...X.t..N
00000040: 8b36 0169 9273 2c91 1f00 0000 0000 0000 .6.i.s,.........
00000050: a278 0ee8 7308 548a 84af 1ad4 6c0c 0844 .x..s.T.....l..D
00000060: de13 a830 ea7f 4d37 19ea 7efe 14b5 c5 ...0..M7..~....
```
Two things are already weird:
* Everything is identical to the given archive until offset `0x30`
* This archive is larger than the given one. Either the flag is really small or data is compressed.
Let's try to archive a really low entropy file to see if the archive is smaller:
```console
$ echo -n 'aaaaaaaaaaaaaaa' > aaaaaaaa
$ ./archiver.exe create password low_entropy.db aaaaaaaa
$ xxd low_entropy.db
00000000: 0100 0000 0000 0000 1f3c e404 15a2 081f .........<......
00000010: a3ee e75f c39f ff8e 56c2 2270 d1a9 78a7 ..._....V."p..x.
00000020: 249b 592d cebd 20b4 1800 0000 0000 0000 $.Y-.. .........
00000030: b2b6 ed4e 38bb 52b8 5cd7 12a7 7df6 261b ...N8.R.\...}.&.
00000040: 2bbf f8df 77c4 dfe8 1f00 0000 0000 0000 +...w...........
00000050: b2b6 ed4e 38bb 52b8 8876 7671 b114 9799 ...N8.R..vvq....
00000060: cb38 24e6 02f9 26f9 25ec a7b8 bdb9 56 .8$...&.%.....V
```
I put exactly the same sizes in the file name and size data.
The resulting archive is exactly the same size so no compression
is performed.
But wait ! Something changed !
In our first archive, the firsts `0x30` bytes where identical
but now they differ starting at `0x8`, they differ for `0x20` bytes
before becoming the same again on the field we identified as a size.
Well, we changed two things: the filename and the file data.
It is unlikely that we guessed the file data on our first try.
But the filename however ? What if the the format stores a hash
of the filename on `0x20` bytes at offset `0x8` ?
```console
$ echo -n 'flag.txt' | sha256sum
21e2ae0fb85fde7bb246ed90194f601e041b3c8ac6e937b1878bd8e0e796a098 -
```
Bingo ! It matches the bytes we have in our first custom archive
and the one given.
So we know that the filename is `flag.txt` and that the archives
stores the `sha256` of the filename at offset `0x8`.
### Cipher texts
I wil now try to play with data sizes to identify what are the sizes in the binary refering to.
Let's create an archive with a filename 1 byte smaller, and data 1 byte larger:
```console
$ echo -n 'aaaaaaaaaaaaaaaa' > aaaaaaa
$ ./archiver.exe create password sizes.db aaaaaaa
$ xxd sizes.db
00000000: 0100 0000 0000 0000 e462 4071 4b5d b3a2 .........b@qK]..
00000010: 3eee 6047 9a62 3efb a4d6 33d2 7fe4 f03c >.`G.b>...3....<
00000020: 904b 9e21 9a7f be60 1700 0000 0000 0000 .K.!...`........
00000030: 8ab9 acd8 126c cc48 064e 9843 2fa1 9492 .....l.H.N.C/...
00000040: 8503 ce34 399c 9820 0000 0000 0000 008a ...49.. ........
00000050: b9ac d812 6ccc f370 a362 da68 de94 c7d2 ....l..p.b.h....
00000060: aa91 eb29 2be2 0aa3 a74f fd99 4dc0 ca ...)+....O..M..
```
Notice that the first `uint64_t` went from `0x18` to `0x17` and
the second one from `0x1f` to `0x20`
Thus the first size and data correspond to the encrypted file name, and the second one to the encrypted data.
We can also see that encrypted data is always exactly `0x10` bytes
larger that the plaintext. So it probably just adds a `0x10` bytes IV in front of it.
Looking back at the original archive, we can thus see that the
the filename is `0x8` bytes large (which matches the `flag.txt`
we found) and the data is also `0x8` bytes large, thus confirming
the really small flag. (see below for a reminder of the original
archive)
```console
$ xxd result.fcsc
00000000: 0100 0000 0000 0000 21e2 ae0f b85f de7b ........!...._.{
00000010: b246 ed90 194f 601e 041b 3c8a c6e9 37b1 .F...O`...<...7.
00000020: 878b d8e0 e796 a098 1800 0000 0000 0000 ................
00000030: d44a b96f ea76 8c55 73a0 7266 1a5e b5fd .J.o.v.Us.rf.^..
00000040: c3bf cc29 8ed7 e925 1800 0000 0000 0000 ...)...%........
00000050: f96d be51 956d 9342 7e3d 1c0d bbef ad7a .m.Q.m.B~=.....z
00000060: bc57 7bf0 f36a cb23 .W{..j.#
```
## Figuring out the crypto
Now I will try to get a file as close as possible as the original
flag and see how the resulting archive behaves to small
input mutations:
```console
$ echo -n '12345678' > flag.txt
$ ./archiver.exe create password 12345678.db flag.txt
$ echo -n '22345678' > flag.txt
$ ./archiver.exe create password 22345678.db flag.txt
$ xxd 12345678.db
00000000: 0100 0000 0000 0000 21e2 ae0f b85f de7b ........!...._.{
00000010: b246 ed90 194f 601e 041b 3c8a c6e9 37b1 .F...O`...<...7.
00000020: 878b d8e0 e796 a098 1800 0000 0000 0000 ................
00000030: 8257 3ccc 2608 498d b6b7 5801 740f 2e4e .W<.&.I...X.t..N
00000040: 8b36 0169 9273 2c91 1800 0000 0000 0000 .6.i.s,.........
00000050: d509 6e9f 3d4a 06c1 9daa bebe f6cb c23b ..n.=J.........;
00000060: cf4e 3d32 2b68 09cf .N=2+h..
$ xxd 22345678.db
00000000: 0100 0000 0000 0000 21e2 ae0f b85f de7b ........!...._.{
00000010: b246 ed90 194f 601e 041b 3c8a c6e9 37b1 .F...O`...<...7.
00000020: 878b d8e0 e796 a098 1800 0000 0000 0000 ................
00000030: 8257 3ccc 2608 498d b6b7 5801 740f 2e4e .W<.&.I...X.t..N
00000040: 8b36 0169 9273 2c91 1800 0000 0000 0000 .6.i.s,.........
00000050: d609 6e9f 3d4a 06c1 cc6e be60 dd5d 8214 ..n.=J...n.`.]..
00000060: 05bb cadc 0bf1 e4b8 ........
```
Most part of the two archives are identical, as expected.
But maybe a little bit too much identical:
Look at the IV of the file data's cipher text (offset `0x50`)
The first 8 bytes are almost identical, only the first one
has been increased by one.
Could it be that the IV is generated with the clear text data ?
Let's try with an other data:
```console
$ echo -n '32345678' > flag.txt
$ ./archiver.exe create password 32345678.db flag.txt
$ xxd 32345678.db
00000000: 0100 0000 0000 0000 21e2 ae0f b85f de7b ........!...._.{
00000010: b246 ed90 194f 601e 041b 3c8a c6e9 37b1 .F...O`...<...7.
00000020: 878b d8e0 e796 a098 1800 0000 0000 0000 ................
00000030: 8257 3ccc 2608 498d b6b7 5801 740f 2e4e .W<.&.I...X.t..N
00000040: 8b36 0169 9273 2c91 1800 0000 0000 0000 .6.i.s,.........
00000050: d709 6e9f 3d4a 06c1 fcd2 be2a c42f bdf1 ..n.=J.....*./..
00000060: 43e8 9879 eb86 bf95 C..y....
```
Again, patching slightly the `n`th byte of the clear text only
patched slightly the `n`th byte of the IV.
I played with some values and noticed that the operation performed
is actually a xor:
```console
>>> 0xd7 ^ ord('3')
228
>>> 0xd6 ^ ord('2')
228
>>> 0xd5 ^ ord('1')
228
```
So the clear text is xored with a key to generate the IV,
but I do not know the said key, which seems to be derived
from the archive password.
## Known plaintext
Or do I ?
Remember that I know that filename is `flag.txt`, and that I have
an associated ciphertext.
With a little bit of luck, the IV of the filename cipher text
is generated with the key:
```console
>>> 0x82 ^ ord('f')
228
```
Looks like it does.
Since I have a known plaintext, example cipher text.
I can simply, xor the plaintext with the filename IV to recover
the xor key.
Then apply the same key to the file data IV to recover the
plain text:
```python
#!/usr/bin/env python3
from pwn import *
filename = b'flag.txt'
IV = b'\xd4\x4a\xb9\x6f\xea\x76\x8c\x55'
c = b'\xf9\x6d\xbe\x51\x95\x6d\x93\x42'
key = xor(IV, filename)
flag = xor(c, key)
print('FCSC{' + flag.decode() + '}')
```
```console
$ ./solve.py
FCSC{KKfYQogc}
```

@ -1,786 +0,0 @@
---
title: "Decompiling a nanomites based VM back to C | Megalosaure @ FCSC 2024"
date: "2024-04-14 22:00:00"
author: "Juju"
tags: ["Reverse", "Writeup", "fcsc"]
toc: true
---
# Intro
Yes it's the third year in a row that I writeup the dinosaur reverse challenge.
But this time it is neither a math or puzzle challenge.
We are instead met with a program that takes 20 minutes to validate the input and forks tens of thousands of processes.
{{< image src="/megalosaure/meme.jpg" style="border-radius: 8px;" >}}
## Challenge description
`reverse` | `487 pts` `6 solves` `:star::star::star:`
```
Voici un binaire qui vérifie si ce qu'on lui passe est le flag. À vous de jouer !
```
Author: `Cryptanalyse`
## Given files
[megalosaure](/megalosaure/megalosaure)
# Writeup
## Overview
Nothing out of the ordinary at the first look.
```console
$ file megalosaure
megalosaure: ELF 64-bit LSB pie executable, x86-64, version 1
(SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2,
BuildID[sha1]=b8bd171568d3bd03eca826edb869205684411dab, for GNU/Linux 3.2.0,
stripped
```
Dynamic analysis however ... the binary first tells us to add a
capability to the binary, `stracing` and `gdb` will thus require higher
privileges to not drop said capability.
`stracing` will show us that the program starts by creating about 10 thousands `pipes`. Before prompting for the flag, inputting
a correctly formatted `FCSC{...}` flag will then cause the program
to fork endlessly for about 20 minutes before refusing the flag.
## Code analysis
### Main function
Here is a decompiled main function.
We can see that the code creates the pipes in `setup_process_limit_and_IPC`,
then creates a shared memory mapping..
It will then ask for the flag, check its format, and split the
input into `0x12` `uint32_t`.
These `int` are then xored in the shared memory by groups of two
and the same function is ran `0x2c` times for each group but more
on this later.
Once this is done, the program saves some bytes in the shared
memory else where, and shift the global shared memory pointer,
before doing the same thing for the next 2 `int` in the input.
The final check simply is an equality test of all the saved values
mentionned above against an hardcoded reference array.
```c
uint32_t* shared_mem = nullptr;
int32_t main(int32_t argc, char** argv, char** envp)
{
setup_process_limit_and_IPC();
shared_mem = mmap(nullptr, 0x100000, 3, 0x21, 0xffffffff, 0);
if (shared_mem == -1)
{
perror("mmap");
exit(1);
}
int32_t* shared_mem_original = shared_mem;
puts("Enter the flag:");
char input[0x46];
memset(&input, 0, 0x46);
if (read(0, &input, 0x46) <= 0)
{
perror("read");
exit(1);
}
if (check_format(&input) != 0)
{
puts("Wrong flag format!");
exit(1);
}
uint32_t (* input_ints)[0x12] = &input;
for (int32_t i = 0; i <= 0x11; i += 2)
{
shared_mem[0] = (shared_mem[0] ^ input_ints[i]);
shared_mem[1] = (shared_mem[1] ^ input_ints[i + 1]);
for (int32_t j = 0; j < 0x2c; j += 1)
start_pod(pod_infos[j].n_children, code, 9);
*(uint64_t*)(((((i + (i >> 0x1f)) >> 1) + 0x100) << 3) + shared_mem_original)
= *(uint64_t*)(shared_mem + 0xb0);
shared_mem = &shared_mem[0x2c];
}
shared_mem = shared_mem_original;
int64_t lose = 0;
for (int32_t i = 0; i <= 8; i++)
lose = (lose | (ref[i] ^ *(uint64_t*)(shared_mem + 0x800) + (i_1 << 3)));
if (lose != 0)
puts("Nope.");
else
puts("Win!!");
if (munmap(shared_mem, 1) != 0xffffffff)
return 0;
perror("munmap");
exit(1);
}
```
### Check format
Let's take a quick look at the `check_format` function:
```c
uint64_t check_format(int32_t* input)
{
shared_mem[0] = input[0];
shared_mem[1] = 0x1337;
shared_mem[2] = 0xa4e1a60a;
start_pod(5, check_bytecode, 0x78 / 10);
return 0 | shared_mem[0] | shared_mem[1] | shared_mem[2];
}
```
We can see that it initializes the shared memory with the first
`uint32_t` of the input then starts the same `start_pod` function
than in main.
### Start pod
The `start_pod` function takes as first parameter what I called a
`pod_info` struct, which is just two `uint32_t`, the first one
is the number of children the pod will fork, the second one is an
offset in some array of `uint16_t` I called `code` given in
parameter, you will understand the name really fast once we check
the `child` function.
The last parameter is the size of a single `code` block given to
`child`. Thus offsetting by this much between each `fork`.
In my terminology, a `pod` is a complete run of all the children
denoted by their `pod_info` and associated `code`.
```c
__pid_t start_pod(int32_t pod_info[2], uint16_t* code, int64_t code_size)
{
for (int32_t i = 0; i < pod_info[0]; i = (i + 1))
{
pid_t pid = fork();
if (pid == 0xffffffff)
{
perror("fork");
exit(1);
}
if (pid == 0)
{
child(&code[(i + pod_info[1]) * code_size]);
/* no return */
}
}
__pid_t i;
do
{
i = wait(nullptr);
} while (i > 0);
return i;
}
```
### Child
We are met with a `while true` loop, which selects an `uint16_t`
from the `code` array and dispatches it in a huge switch.
I immediatly recognize the pattern of a virtual machine,
and start identifying the instruction pointer `ip` and the
`stack` by looking the first few instructions of the switch.
I will not show how I reversed all the instruction as many of
them are really similar but I will show the interesting ones.
```c
void child(uint16_t* code) __noreturn
{
uint32_t stack[0x400];
memset(&stack, 0, 0x1000);
int32_t next_ip = 0;
int32_t sp = 0;
uint16_t opcode;
while (true)
{
int32_t ip = netx_ip;
next_ip = ip + 1;
opcode = code[ip];
switch (opcode)
{
case 0x0:
{
...
}
case 0x1:
...
}
}
if (opcode != 0x12)
exit(1);
exit(0);
}
```
## Instruction set analysis
### Push
First let's look at opcodes `0x1` and `0x2`.
These are how I recognized and was able to rename the stack
memory and stack pointers.
We can see that the first instruction takes one operand right
after the opcode, it then increments the stack pointer, fetches
an `uint32_t` from the shared memory, indexed by the first
operand, and stores it in the stack.
Basically a `push mem` instruction
The second one is really similar but takes two immediate operands,
both operands are `uint16_t` but they are packed as a single
`uint32_t` and stored on the stack, so this is the `push imm`
instruction
```c
case 1:
{
int32_t operand_ptr = next_ip;
next_ip = (operand_ptr + 1);
int32_t old_sp = sp;
sp = old_sp + 1;
stack[old_sp] = shared_mem[code[operand_ptr]];
break;
}
case 2:
{
int32_t operand2_ptr = next_ip + 1;
int64_t operand1_ptr = next_ip;
next_ip = operand2_ptr + 1;
int32_t old_sp = sp;
sp = old_sp + 1;
stack[old_sp] = code[operand1_ptr] | (code[operand2_ptr] << 0x10);
break;
}
```
### Pop
This is the inverse operation, takes an `uint32_t` from the stack
and stores it in the shared memory indexed on the instruction's
operand.
```c
case 4:
{
int32_t operand_ptr = next_ip;
next_ip = operand_ptr + 1;
uint32_t operand = code[operand_ptr];
sp = sp - 1;
int32_t val = stack[sp];
stack[sp] = 0;
shared_mem[operand] = val;
break;
}
```
### Add
I will show only a single arithmetic instruction, all the others
work in a similar way:
This one pops two operands from the stack, add them together, and
stores the result back on the stack.
So we now know that this VM is stack based, similar to `python`
or `WASM` bytecode, operands and result of each instruction are
fetched and stored from/on the stack.
```c
case 6:
{
int32_t first_op_ptr = (sp - 1);
int32_t stack_op = stack[first_op_ptr];
stack[first_op_ptr] = 0;
int32_t second_op_ptr = first_op_ptr - 1;
int32_t stack_op2 = stack[second_op_ptr];
stack[second_op_ptr] = 0;
sp = second_op_ptr + 1;
stack[second_op_ptr] = stack_op2 + stack_op;
break;
}
```
### IPC
Before doing more work, two other instructions are really
important, check the code first:
```c
case 3:
{
int32_t operand_ptr = next_ip;
next_ip = (operand_ptr + 1);
uint32_t operand_1 = code[operand_ptr];
sp = sp - 1;
int32_t val = stack[sp];
stack[sp] = 0;
for (int32_t i = 0; i < operand; i++)
{
int32_t operand_i_ptr = next_ip;
next_ip = operand_i_ptr + 1;
if (write(pipes[code[operand_i_ptr]][1], &val, 4) == -1)
{
perror("write");
exit(1);
}
}
break;
}
```
This instruction takes one operand from the stack and one operand
after the opcode.
The operand encoded in the instruction is used to know how many
more operands are left.
For each of them, the instruction will write the stack operand in
the pipe corresponding to the current operand.
We can guess that this is how IPC is performed between each child.
So let's look at the read instruction:
It works in a really similar way and takes the same operand,
except that this time it will setup an epoll instance to read on
very pipe given as operand and store the `read` output on the
stack for each operand.
```c
case 0:
{
int32_t n_operands_ptr = next_ip;
next_ip = n_operands_ptr + 1;
uint32_t n_operands = code[n_operands_ptr];
int32_t epoll = epoll_create1(0);
if (epoll == 0xffffffff)
{
perror("epoll_create1");
exit(1);
}
for (int32_t i = 0; i < n_operands; i++)
{
int32_t n_operands_i_ptr = next_ip;
next_ip = n_operands_i_ptr + 1;
int32_t fd = pipes[code[n_operands_i_ptr]][0];
int32_t epoll_event = 1;
int64_t var_1100_1 = fd | (i << 0x20);
if (epoll_ctl(epoll, 1, fd, &epoll_event) != 0)
{
perror("epoll_ctl");
exit(1);
}
}
uint32_t n_operands_cpy = n_operands;
do
{
struct epoll_event events;
int32_t nb_events = epoll_wait(epoll, &events, 1, 0xffffffff);
for (int32_t j = 0; j < nb_events; j++)
{
// Weird but basically recovers the FD from the event
int64_t fd = *(j * 0xc + &var_8) - 0x1104;
if (read(fd, &stack[(fd >> 0x20) + sp], 4) <= 0)
{
perror("read");
exit(1);
}
}
n_operands_cpy = n_operands_cpy - 1;
} while (n_operands_cpy != 0);
sp = sp + n_operands;
if (close(epoll) != 0)
{
perror("close");
exit(1);
}
break;
}
```
## Disassembling
Right, so let's not look too much at the IPC thingy.
I will start by disassembling the byte code of independant
children, then we will see if we can deduce patterns.
So I implemented a `binaryninja` plugin (my predilection decompiler) for the VM.
{{< code file="/static/megalosaure/src/plugin/__init__.py" language="python" >}}
Remember the `start_pod` and `check_format` functions ?
The check format passed a specific byte code to only 5 children.
This is probably a good first look
Here is how the plugin looked like on the check format bytecode:
{{< image src="/megalosaure/binja_plugin.png" style="border-radius: 8px;" >}}
Every function defined here is a specific child.
The first one pushes the first `uint32_t` of the `shared_memory`
(I wrote this as `m[0x0]` in the disassembler)
on the stack, then pops it and writes it on the first pipe (`r0x0`).
I consider pipes as registers.
The second child does the same but with `m[0x1]` and `r0x1`.
Third child reads `r0x0`, then `r0x1`, multiplies the two values
and writes the result to `r0x2`
Fourth child reads `r0x2`, pushes `m[0x2]`, xor both values,
and writes the result to `r0x3`.
Finally, the last child reads `r0x3`, dupplicates the value on the
stack twice and pop them all in `m[0x0]`, `m[0x1]` and `m[0x2]`
If we look again at the `check_format` function:
```c
uint64_t check_format(int32_t* input)
{
shared_mem[0] = input[0];
shared_mem[1] = 0x1337;
shared_mem[2] = 0xa4e1a60a;
start_pod(5, check_bytecode, 0x78 / 10);
return 0 | shared_mem[0] | shared_mem[1] | shared_mem[2];
}
```
It checks that once the pod has executed, `m[0:3]` is all `0`.
Doing it in the inverse order, it means that the result of the
xor must be 0, thus `input[0] * 0x1337 == 0xa4e1a60a`
This small script does the modular inverse the retrieve
`input[0]`:
```python
#!/usr/bin/env python3
from Crypto.Util.number import inverse
import struct
import os
N = 2**32
def reverse(desired_out, mult):
return ((desired_out) * inverse(mult, N)) % N
first = reverse(0xa4e1a60a, 0x1337)
print(struct.pack('<L', first))
```
With this output:
```console
$ ./invert.py
b'FCSC'
```
Good, we are definitely on the right track.
## Lifting
Great but now if we look at real pods launched for the flag checking,
they contain thousands of children, and have 2 inputs instead of one
(given through `m[0x0]` and `m[0x1]`)
We need to do something smart.
We noticed in the `check_format` example that children essentialy
recover one or two inputs (from memory, immediate, or register),
perform a single operation, and output the result to a register
or memory.
Looking back at the `code_size` given to `start_pod` in the `main` function, we can see that there are at most 9 instructions per child.
So it is unlikely that the real check children can do much more
than take inputs, compute a single operation and send its outputs.
The `binja` plugin must be improved, and we will throw away the
binja part actually.
Instead of disassembling independant children, I need to disassemble
a whole pod.
### Creating an AST
First thing we can build is each child's register dependencies.
I will simply mark which registers the child reads from and which
ones he writes to.
Now, knowing by which register a child is "locked" by reading
and which one he "unlocks" by writing, I can build the dependency
graph of all children.
To do that I implemented a simple algorithm which marks locked
and ready registers and by which child a register was unlocked.
Any child wanting to read a register will be able to do so only
if it is unlocked, if it is, I will give the current child a
reference to the child which originally unlocked the register
it is trying to read. The register will thus be consumed by the
child and be marked as locked again.
Any child which wants to write to a regiser will simply unlock
the register and mark itself as the one which unlocked it.
Obviously, this can only be done if all the child's registers
where consumed, otherwise, the child is still waiting for its
input and cannot write its output.
We do this in a loop until all children have been scheduled.
Inspecting the built graph, I quickly notice that all children
converge to a single output child and that there is no circular
dependency. The graph is thus an AST.
Each node of the AST performs and outputs a single operation based
on one or two inputs registers.
The leafs of the AST do not have dependencies, they simply
take inputs from immediate values or shared memory.
I also notice that the root of the AST has a single input,
which is simply outputed in shared memory.
Further analysis will show me that in the AST of every pod, given
pod number `n`:
* Only the root child outputs to memory, and at index `n+2`
* Only the leafs reads from memory, at indexes `n` and `n+1`
### Recalling the objective
As a reminder, here is the for loop which computes the result
tested against the `ref`.
```c
uint32_t (* input_ints)[0x12] = &input;
for (int32_t i = 0; i <= 0x11; i = (i + 2))
{
shared_mem[0] = (shared_mem[0] ^ input_ints[i]);
shared_mem[1] = (shared_mem[1] ^ input_ints[i + 1]);
for (int32_t j = 0; j < 0x2c; j++)
start_pod(pod_infos[j].n_children, code, 9);
*(uint64_t*)(((((i + (i >> 0x1f)) >> 1) + 0x100) << 3) + shared_mem_original)
= *(uint64_t*)(shared_mem + 0xb0);
shared_mem = &shared_mem[0x2c];
}
```
The output is recovered from `shared_memory[0x2c]` (`0xb0` is `0x2c * 4`) on 8 bytes, which are the output of the two last pods
So we have `0x2c` pods, each one outputting the inputs for the next
one.
Once all pods have run, notice we shift, the shared_mem by `0x2c`
thus right on the last pods output. Which will be used to xor
the next input for the run of `0x2b` pods.
This seems like a `cbc` mode of operation but I did not made any
link to block ciphers at that time.
I will split the problem by solving each block of 8 input bytes
independently.
So I have a reference `uint64_t`, I want to find the two
`uint32_t` which will give this output after passing in all
of my `0x2c` ASTs.
### Do the intstructions backward :clown:
I thought about simply taking the desired output and inverting
every operation since I have the complete AST. However I quickly
noticed it was not possible because of operations like `shl`, `shr`, `or` and `and`.
These operations plus the fact that our inputs are fetched from
multiple leafs of the AST make the whole thing close to
impossible to invert.
### z3 attempt
This is actually not the attempt I made first but I went back and
forth on many ideas so I will explain my failed ideas here so
it doesn't cut the flow of the rest of the writeup.
So at some point I tried to build a z3 solver by traversing the
AST.
It did not work out in the end because I found a promising
solution which was showing results in parallel.
Now I know that it didn't find anything because I built the
solver by traversing all the `0x2c` ASTs, which is too much
obviously.
Basically my mistake was that at the time, I didn't know that
the VM was a symetric cipher, thus I has no idea of the unicity
of the input. So I thought that I NEEDED, to add a constraint
on the first input `uint32_t` (which I knew was `FCSC`) to
find a single solution.
But now I know that the input of every AST is unique so
solving ASTs one by one is much easier.
### Lifting to C
My actual first idea was that I knew that the flag started with
`FCSC{`, which only let me 3 unknown bytes in the first block.
This would be fairly trivial to bruteforce if the VM did not need
3 minutes to compute a single block.
I could have implemented an interpreter on top of the AST, but
since I decided to go for the bruteforce solution, I went for it
all and transpiled it to C.
{{< code file="/static/megalosaure/src/disasm.py" language="python" >}}
Running it will give this output, and a file `megalosaure.c`
```console
$ ./disasm.py
[*] '/home/juju/ctf/fcsc_2024/reverse/megalosaure/megalosaure'
Arch: amd64-64-little
RELRO: Partial RELRO
Stack: No canary found
NX: NX enabled
PIE: PIE enabled
[+] Loading virtual machines
[+] Lifting AST
[+] Transpiling to C
[+] Transpiled to ./megalosaure.c
```
The `megalosaure.c` file is an implementation of a single run of
all the `0x2c` pods.
If you are interesed the disasm.py script also contains the code
of my z3 attempt.
## Bruteforcing until we win
### First block
The first block is trivial to bruteforce so I implemented a
simple bruteforce c program which links against a heavily optimised `megalosaure.c`.
{{< code file="/static/megalosaure/src/simple.c" language="c" >}}
With the following `Makefile` (which also has the final targets for the final
solver)
{{< code file="/static/megalosaure/src/Makefile" language="makefile" >}}
You can run `make simple` to build this simple bruteforcer for the first block.
```console
$ ./simple
FCSC{454
```
Great I have the first 8 bytes of the flag. Now what ?
This strategy will not work on other blocks, where all of the 8
bytes are unknown.
### Angr attempt
So since I had the source code, I thought that I could try angr
on this one, surprisingly enough, this did not give anything.
For the same reason as z3, doing all the pods at once is just
too much.
### Reducing the character set
Now things are becoming really nasty for my solver, I was
working in parallel on the z3 solver and as I ran it on my first
try, I thought
> Hey "FCSC{454" does not look like a funny string, maybe this flag is only a hexstring
So I started bruteforcing all the blocks but only on hex digits,
which comes back to 2^32 iterations per block, completly doable.
However just remember that before being inputted in the first
pod, the input is xored with the output of the previous block.
Since I have the reference array, I know the desired output of
all the blocks and can bruteforce them in parralel.
Watch out, the code is dirty.
{{< code file="/static/megalosaure/src/main.c" language="c" >}}
You can run `make` to compile the solver.
It takes about 20 minutes to run, and prints each block when it
finds one.
```console
$ time ./solver
Block 6: 06a5611b
Block 1: 2d32e27c
Block 4: 4016b156
Block 8: 420ac}
Block 7: c18edd32
Block 3: d3418e7a
Block 2: de2d7cf7
Block 5: e4df7f0c
real 21m18,662s
user 107m44,082s
sys 0m0,936s
```
I then reconstituted the flag manually by pasting each block
`FCSC{4542d32e27cde2d7cf7d3418e7a4016b156e4df7f0c06a5611bc18edd32420ac}`
After solving the challenge and discussing with its author,
I learned that the VM actually implemented a symetric block cipher (SIMON-64-128), with a null IV, and CBC mode of operation.
The key was embeded in the code, so it was actually a whitebox.
Looking back at everything, we can clearly see that one pod is
actually a round of encryption, a block is encrypted through
`0x2c` rounds, with each block input being xored with the output
of the previous block (0 for the first block), thus the CBC and
null IV.

@ -1,431 +0,0 @@
---
title: "Lifting a reloc based VM | Svartalfheim @ FCSC 2024"
date: "2024-04-14 22:00:00"
author: "Juju"
tags: ["Reverse", "Writeup", "fcsc"]
toc: true
---
# Intro
Svartalfheim is a weird reversing challenge. It seems like a simple x64 ELF with
only a few bytes of machine code, but after playing with it, you might notice
some quantum behaviours. The program might be patching itself when you are
not looking at it, so stay alert :eyes:.
## Challenge description
`reverse` | `467 pts` `14 solves` `:star::star::star:`
```
Trouvez le flag accepté par le binaire.
```
Author: `Quanthor_ic`
## Given files
[svartalfheim](/svartalfheim/svartalfheim)
# Writeup
## Overview
Things are already weird without opening up any disassembler:
`file` tells us the the binary is dynamically linked but ldd says otherwise.
```console
$ file svartalfheim
svartalfheim: ELF 64-bit LSB executable, x86-64, version 1 (SYSV),
dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, no
section header
$ ldd svartalfheim
not a dynamic executable
```
Here is the decompiled code of the entrypoint.
{{< code file="/static/svartalfheim/main.c" language="c" >}}
Basically, it simply deletes a file named `_` from the current directory, then
re-create it and open it write mode.
The process then dumps itself into the opened file, close it and execve the
dumped file.
Cool so this should do absolutely nothing except calling itself endlessly.
But what if we give it a try:
```console
./svartalfheim
Welcome to Svartalfheim
FCSC{test}
Nope
```
WTF is going on ?
## Quantum binary
So first I wanted a way to see what was happening between each execution.
I patched the `execve` with a breakpoint in the binary.
It will now crash instead of starting itself again and I can inspect the
`_` file before the next instance deletes it.
Here is the diff of the hexdump of the patched binary (breakpoint instead
of execve) with the hexdump of the `_` file after a single execution:
```console
$ diff svartalfheim_breakpointed.hex first_run.hex
517c517
< 00002040: 0700 0000 0000 0000 e821 0400 0000 0000 .........!......
---
> 00002040: 0700 0000 0000 0000 3037 0400 0000 0000 ........07......
519c519
< 00002060: 0800 0000 0000 0000 3000 0000 0000 0000 ........0.......
---
> 00002060: 0800 0000 0000 0000 6000 0000 0000 0000 ........`.......
```
A total of 3 bytes have changed, so I go check the corresponding addresses
in my decompiler:
{{< image src="/svartalfheim/rela_patched.png" style="border-radius: 8px;" >}}
The first two bytes that are patched are the relocation table addr inside
the dynamic table.
The last byte patched is the size of said relocation table.
This means that at the next execution, the program will have different relocations.
Maybe we should take a look at the original relocation table:
{{< image src="/svartalfheim/original_relocs.png" style="border-radius: 8px;" >}}
Unusual relocations indeed. So the first one points to the relocation table
address inside the dynamic table and the second one to the relocation table
size, also in the dynamic table, the two values that were patched in the next
binary. We can already guess that the relocation table will be patched at
every execution, running new relocations every time, just like a processor
run instructions and increments its program counter. This might be a
relocation based virtual machine.
## Reloc based virtual machine
### Figuring out the instruction set
So since the relocation table have changed in the next binary, let's open this
one and check the new relocation table:
{{< image src="/svartalfheim/second_relocs.png" style="border-radius: 8px;" >}}
Once again we can see relocs pointing to DT_RELA and DT_RELASZ values, but there are also two other addresses that are patched.
When looking them up, we can see that these two addresses are located inside
the symbol table. To be precise, the values of symbol `1` and `2` are patched.
Below is the corresponding symbol table:
{{< image src="/svartalfheim/second_symtab.png" style="border-radius: 8px;" >}}
Great so now let's run the binary a second time and inspect the third relocation table.
I will stop on this one a bit longer because it actually contains the entire
instruction set.
But first, what is *really* happening ?
The relocation table holds ... relocations indeed.
Relocations are applied by `ldso` when a process is loaded into memory
(so at `execve`).
Applying a relocation has different effects depending on the relocation type
(denoted in the lower 32 bits of the `info` field of the reloc).
However, most relocation types imply dereferencing the `value` of a symbol (see the above screenshot for an example of the symbol table) and storing it in the relocation address.
The said symbol is denoted by the higher 32 bits of the `info` field of the reloc.
Now let's really look at the third relocation table and run it in our mind:
{{< image src="/svartalfheim/third_relocs.png" style="border-radius: 8px;" >}}
The first relocation is of type `0x8`, and has symbol `0x0` (which mean no symbol)
It points to the address of the `value` of symbol `0x5`.
Relocation type `0x8` will simply put its `addend` value at the address pointed by its `addr` field. Thus storing `0xff` in the `0x5` symbol `value`
Basically this relocation is a `mov mem, imm` instruction.
Second relocation is of type `0x1` and symbol `0x1`.
This relocation will take the `value` of symbol `0x1`, add the reloc
`addend` value, and store the result at the relocation `addr`
So it looks like some sorts of `add mem, reg, imm` instruction, considering
symbols as registers.
I'll do the third relocation and we will have the whole instruction set:
The type is `0x5`, symbol `0x1`. It will take the value of the corresponding symbol, dereference it, and store it in the reloc `addr`.
The assembly for this might look like `mov mem, [reg]`
Here we go, that's it, an instruction set of 3 instructions,
there isn't even an instruction to branch or to add two registers together.
Let's write an interpreter for the VM so we can debug it.
### Writing the interpreter
Basically the interpreter will have multiple roles in the analysis:
* Get an execution trace and disassembly of the virtual machine
* Set breakpoints during execution
* Dump process to inspect the patched ELF at any stage easily and fast
{{< code file="/static/svartalfheim/interpreter.py" language="py" >}}
This interpreter stops every time that the VM patches the native code section
of the binary, this way I can stop whenever IO is performed, dump the binary
and analyse it.
The VM patches the native code a total of 7 times:
* Setup a syscall to write the prompt on stdout
* Immediately after, reset the native code to its original content
* Setup a syscall to read the flag from stdin
* Immediately after, reset the native code to its original content
* Setup a syscall to write the flag validation
* Immediately after, reset the native code to its original content
* Setup a syscall to exit the program instead of the `execve` it again
Investigating the third dumped binary will show us the flag address given to `read`, which will allow us to inject it in our interpreter:
{{< image src="/svartalfheim/radare_read.png" style="border-radius: 8px;" >}}
The interpreter also builds a disassembled execution trace:
I tried to make it readable as if it was intel assembly.
I added some comments for easier analysis:
* NATIVE CODE LOADING means that this block (a complete run of a single relocation table) has patched the native code section
* The commented hexstring is the data that is being outputed in the destination operand
* PATCHING CODE means that this instruction has a destination address pointing to the next instruction, meaning it is trying to patch its own code
* PATCHING FAR is the same but on an instruction of the same block but not the next one
These were really helpful during analysis to have a reminder to check for code patching.
You might be saying that the example assembly below doesn't correspond to the
instruction set defined above as there was no such instruction as
`add reg, reg, imm`, it is indeed true, but the trick is that every register
are memory mapped (since they are simply symbols in the symtab of the ELF), so a memory deref can actually be a register and my disassembler lifts this.
```console
0x48080: mov [0x480f0], $0xffffffffffffffa0 # a0ffffffffffffff PATCHING FAR
0x48098: add [0x480b0], r4, $0x480d8 # f080040000000000 PATCHING CODE
0x480b0: mov [0x480f0], $0x0 # 0000000000000000 PATCHING FAR
0x480c8: add r1, r1, $0x0 # 1036040000000000
0x480e0: add r1, r1, $0x0 # 1036040000000000
0x480f8: add r1, r1, $0x10 # 2036040000000000
0x48110: mov DT_RELA, [r1].8 # 5881040000000000
0x48128: add r2, r1, $0x8 # 2836040000000000
0x48140: mov DT_RELASZ, [r2].8 # 7800000000000000
#End of block
0x48158: mov r7, $0x5 # 0500000000000000
0x48170: add r1, r1, $0x10 # 3036040000000000
0x48188: mov DT_RELA, [r1].8 # d081040000000000
0x481a0: add r2, r1, $0x8 # 3836040000000000
0x481b8: mov DT_RELASZ, [r2].8 # f000000000000000
#End of block
# NATIVE CODE LOADING:
0x481d0: mov [0x4100e], $0xba48 # 48ba000000000000
0x481e8: mov [0x41016], $0xbe480000 # 000048be00000000
0x48200: mov [0x4101e], $0x6a5f016a00000000 # 000000006a015f6a
0x48218: mov [0x41026], $0x90909090050f5801 # 01580f0590909090
0x48230: add [0x41010], r7, $0x0 # 0500000000000000
0x48248: add [0x4101a], r8, $0x0 # 23a2050000000000
0x48260: add r1, r1, $0x10 # 4036040000000000
0x48278: mov DT_RELA, [r1].8 # c082040000000000
0x48290: add r2, r1, $0x8 # 4836040000000000
0x482a8: mov DT_RELASZ, [r2].8 # c000000000000000
#End of block
```
### Side channel attempt
My first attempt at a solver was really simple, I thought that maybe the
VM would check bytes one by one.
So I added a method to inject the flag into my interpreter's memory and tried
to bruteforce the first char, watching for the length of the execution trace every time.
But all 256 possible bytes gave the same number of instructions
At this points I was thinking about lifting the code a little more to reduce
the trace size before reading it, but I was in the mood to read 65000+ lines of the same 3 instructions.
## Analysing the execution trace
Just kidding I did not actually read the whole execution trace.
I knew that the flag start with `FCSC{`, so I dumped 2 execution trace:
* One with the flag as `FCSC{test`
* The other one with `HCSC{test`
I put them side by side (`FCSC{` on the left, `HCSC{` on the right), jumped right after the `read` call, and started comparing them, and lifting the code on paper.
{{< image src="/svartalfheim/trace_read.png" style="border-radius: 8px;" >}}
I will not show you the whole execution trace to keep your eyes safe, but
the VM starts by a bunch of `mov reg, imm` instructions to initialize some
variables.
And then a fun pattern appears:
### `add mem, reg, reg`
{{< image src="/svartalfheim/add_reg_reg.png" style="border-radius: 8px;" >}}
These three instructions together can actually be lifted to `add r13.b, r6, r13`
It is crucial to understand this simple pattern before we continue to how
branching are handled in this VM.
Start by looking the first instruction: it takes the value of `r13`, add 0,
and store it at `0x45a50`. The first thing to notice is that `add mem, reg,
$0x0` is actually equivalent to a `mov mem, reg`, but there is no such
instruction in our instruction set (`mov mem, [reg]` will deref the reg) thus the add instruction trick.
Then if you look the destination address, it points to the the next instruction `addend`. Looking at the comment, we know that the output value is `0x0` on 8 bytes.
So now when we look at the second instruction, it is indeed `add r13, r6,
$0x0`, but said immediate `$0x0` was patched by previous instruction, with the
value of `r13`, even if the instruction add an immediate, in this context,
the immediate was patched with a register. Thus performing a `add mem, reg, reg`
The third instruction simply zeroes out the 7 higher bytes of the `r13`
register, my disassembler did not lift the addresses of the higher bytes of
regs but trust me on this one.
The comment on the second instruction shows us that the addition had a result of `0x1` (64 bits little endian) (`r6` had value 1), so these 3 instructions simply increment `r13`.
### Lookup tables
I skipped a few instructions, all you need to know is that `r3` points
to the first byte of the flag, and that `r12` was initialized with a byte
coming from an array indexed with the same counter as the flag.
{{< image src="/svartalfheim/lut_lookup.png" style="border-radius: 8px;" >}}
The first block simply loads the current flag byte in `r11` (`0x46` = `'F'`)
and the second one basically substitutes the byte from the flag based on a lookup table.
The LuT is indexed based on the flag byte and `r12`, which I assume is some
sort of nonce to add the information of the position of the byte in the
flag during the substitution.
Here is a c equivalent I lifted on paper:
```c
uint64_t r12 = nonce[i];
uint64_t r11 = flag[i];
uint64_t r3 = (r11 << 32) + r12;
uint64_t *LuT = 0x49000;
r11 = *(LuT + r3);
```
The next few blocks are not that important, they store the LuTed byte in memory, increment string iterators and decrement size counters
But then comes the one most important code pattern of this VM:
### Branching
{{< image src="/svartalfheim/for.png" style="border-radius: 8px;" >}}
These two blocks perform a branch
It essentially is a `test r7, r7; jne mem`
Here is how it works after lifting:
```c
// First block
jump_table = 0x59000;
r4 = jump_table[r7]; // r7 is remaining flag len
// Second block
r1 = r1 + 0x10 - 0xf0 + r4;
// DT_RELA = r1, r1 is the program counter
```
So `0x59000` contains a jump table indexed on the remaining flag size.
Here you can see at instruction `0x46628`, the jump offset is in the comment.
I noticed it changed when `r7` reached 0.
It is essentialy the first `for` loop iterating on the flag, applying lookup
tables on each byte.
### Flag checking
After that there is a really similar block of code, also performing lookups
of some sort I did not really bother to understand (as the ones of the
previous step) because I found a really interesting branch which was not a loop.
I notice a similar pattern than the for loop above, sligthly different but
still some kind of jump table.
What stroke me is that as you can see on the screenshot bellow, it was
the first time in the whole execution trace, that my purposely wrong flag, ran
to a different branch than the one starting with `FCSC{`
{{< image src="/svartalfheim/check.png" style="border-radius: 8px;" >}}
What I did not notice at first is that there are two different branchments in
the screenshot (with the jump offsets marked in red). I noticed it quickly and
backported it to my solver.
I do not actually know what is the meaning of these 2 checks regarding the
previous look up tables but all I know is that I needed to hit jump
offset `0x18` twice for a flag byte to be valid.
So I modified my interpreter to add a breakpoint at the addresses marked in red,
check the value moved in `r4` is `0x18`.
And then bruteforced byte by byte:
While bruteforcing the `n`th byte, I need to hit the breakpoint succesfully (with `0x18` in `r4`) `2*n` times. If the breakpoint check fails once then the byte is fucked up.
# Solver
Here is the complete solver code, with the interpreter, correct breakpoints and
bruteforcing
{{< code file="/static/svartalfheim/solve.py" language="python" >}}
```console
$ ./solve.py
bytearray(b'F\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
bytearray(b'FC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
bytearray(b'FCS\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
...
bytearray(b'FCSC{162756828312aad562394d47c854134803a092d7f5b9eb795528f4a0f16f7c65}')
$ ./svartalfheim
Welcome to Svartalfheim
FCSC{162756828312aad562394d47c854134803a092d7f5b9eb795528f4a0f16f7c65}
Well done!
```

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 63 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 92 KiB

@ -1,15 +0,0 @@
.PHONY: all run
all: ./solver
./simple: ./megalosaure.c simple.c
gcc -Wno-overflow simple.c megalosaure.c -o simple -O3 -march=native -fno-pie -no-pie
./megalosaure.c: ./disasm.py
./disasm.py
./solver: ./megalosaure.c main.c
gcc -Wno-overflow main.c megalosaure.c -o solver -O3 -march=native -fno-pie -no-pie
run: all
./solver

@ -1,491 +0,0 @@
#!/usr/bin/env python3
import struct
from pwn import *
from typing import Optional, List
from Crypto.Util.number import inverse
from z3 import *
opcodes = {
0x0: "pushread_i_f",
0x1: "push_m",
0x2: "push32_i_i",
0x3: "popwrite_i_f",
0x4: "pop_m",
0x5: "dup",
0x6: "add",
0x7: "sub",
0x8: "mul",
0x9: "mod",
0xa: "xor",
0xb: "and",
0xc: "or",
0xd: "shr",
0xe: "shl",
0xf: "not",
0x10: "neg",
0x11: "nop",
0x12: "exit",
}
registers = [
"ip",
"sp"
]
N = 2**32
class Stream:
i: int
buf: bytes
def __init__(self, buf) -> None:
self.pos = 0
self.buf = buf
def read_u16(self) -> Optional[int]:
try:
val = struct.unpack("<H", self.buf[self.pos:self.pos + 2])
self.pos += 2
return val[0]
except:
return None
def read_i16(self) -> Optional[int]:
try:
val = struct.unpack("<h", self.buf[self.pos:self.pos + 2])
self.pos += 2
return val[0]
except:
return None
def read_u32(self) -> Optional[int]:
try:
val = struct.unpack("<I", self.buf[self.pos:self.pos + 4])
self.pos += 4
return val[0]
except:
return None
def read_i32(self) -> Optional[int]:
try:
val = struct.unpack("<i", self.buf[self.pos:self.pos + 4])
self.pos += 4
return val[0]
except:
return None
class Operand:
pass
class Register(Operand):
index: int
name: str
def __init__(self, index: int) -> None:
self.index = index
self.name = f"r{hex(index)}"
class Mem(Operand):
index: int
name: str
def __init__(self, index: int) -> None:
self.index = index
self.name = f"m[{hex(index)}]"
class Imm(Operand):
value: int
unsigned: int
def __init__(self, value: int) -> None:
self.value = value
self.raw = struct.unpack("<I", struct.pack("<i", self.value))[0]
class Instruction:
opcode: int
operands: List[Operand]
addr: int
name: str
size: int
def __init__(self, opcode, operands, opcodes) -> None:
self.opcode = opcode
self.operands = operands
self.name = opcodes[self.opcode] if self.opcode in opcodes else "invalid"
self.size = 2 * (len(operands) + 1)
@staticmethod
def disassemble(data: bytes) -> 'Instruction':
stream = Stream(data)
opcode = stream.read_u16()
operands = []
if opcode not in opcodes:
return Instruction(opcode, [], addr, opcodes)
mnemonic = opcodes[opcode]
mnemonic_decomp = mnemonic.split('_')
mnemonic_decomp.pop(0)
for element in mnemonic_decomp:
if element == 'i':
value = stream.read_u16()
operand = Imm(value)
operands.append(operand)
elif element == 'm':
value = stream.read_u16()
operand = Mem(value)
operands.append(operand)
elif element == 'f':
n_regs = operands[-1].value
for i in range(n_regs):
index = stream.read_u16()
operand = Register(index)
operands.append(operand)
return Instruction(opcode, operands, opcodes)
def to_string(self):
asm = self.name
for op in self.operands:
asm += ' '
if isinstance(op, Register):
asm += op.name
elif isinstance(op, Mem):
asm += op.name
elif isinstance(op, Imm):
asm += f"{hex(op.value)}"
return asm
def transpile(self):
if self.name == "pop_m":
return ''
elif self.name == "push32_i_i":
val = (self.operands[1].value << 0x10) | self.operands[0].value
return f'{val}'
elif self.name == "push_m":
return f'm{self.operands[0].index}'
elif self.name == "add":
return '+'
elif self.name == "sub":
return '-'
elif self.name == "mul":
return '*'
elif self.name == "mod":
return '%'
elif self.name == "xor":
return '^'
elif self.name == "and":
return '&'
elif self.name == "or":
return '|'
elif self.name == "not":
return '~'
elif self.name == "neg":
return '-'
elif self.name == "shr":
return '>>'
elif self.name == "shl":
return '<<'
else:
raise
class Child:
def __init__(self, pod, index):
self.pod = pod
self.index = index
self.offset = (pod.offset + index) * self.pod.code_size
self.code = self.pod.megalosaure.elf.read(self.pod.code_base + self.offset * 2, self.pod.code_size * 2)
self.instructions = []
self.depends = []
self.unlocks = []
self.inputs = []
self.forced = False, 0
self.main_instr = None
while True:
instr = Instruction.disassemble(self.code)
self.code = self.code[instr.size:]
self.instructions.append(instr)
if instr.name == 'exit':
break
elif instr.name == 'pushread_i_f':
for reg in instr.operands[1:]:
self.depends.append(reg.index)
elif instr.name == 'popwrite_i_f':
for reg in instr.operands[1:]:
self.unlocks.append(reg.index)
else:
if self.main_instr != None:
raise
self.main_instr = instr
def to_string(self):
asm = ""
for instr in self.instructions:
asm += instr.to_string() + '\n'
return asm
def consume_locks(self, locks):
while self.depends != []:
dependency = self.depends[0]
if locks[dependency][0]:
return
self.inputs.append(locks[dependency][1])
locks[dependency] = (True, None)
self.depends.pop(0)
def is_schedulable(self, locks):
return len(self.depends) == 0
def mark_unlocks(self, locks):
for unlock in self.unlocks:
locks[unlock] = (False, self)
def invert(self, desired):
instr = self.main_instr
op = instr.name
inputs = self.inputs
if len(inputs) == 0:
if op == 'push_m':
return desired
elif op == 'push32_i_i':
print(self.forced)
raise
else:
print(op)
elif len(inputs) == 1:
inp = inputs[0]
if op == "pop_m":
return inp.invert(desired)
if op == "not":
return inp.invert((~desired) % N)
else:
print(op)
else:
forced0 = inputs[0].forced
forced1 = inputs[1].forced
if not forced0 and not forced1:
return
forced_child = inputs[0] if forced0[0] else inputs[1]
unk_child = inputs[1] if forced0[0] else inputs[0]
if op == 'xor':
return unk_child.invert(desired ^ forced_child.forced[1])
elif op == 'sub':
if forced0:
return unk_child.invert((desired + forced_child.forced[1]) % N)
else:
return unk_child.invert((-desired + forced_child.forced[1]) % N)
elif op == 'add':
return unk_child.invert((desired - forced_child.forced[1]) % N)
elif op == 'and':
return unk_child.invert(desired & forced_child.forced[1])
elif op == 'or':
return unk_child.invert(desired | forced_child.forced[1])
elif op == 'mul':
return unk_child.invert(((desired) * inverse(forced_child.forced[1], N)) % N)
elif op == 'shl':
return unk_child.invert(((desired) * inverse(forced_child.forced[1], N)) % N)
else:
print(op)
def transpile(self, indent=0):
code = "(\n"
indent += 4
code += ' ' * indent
instr = self.main_instr
op = instr.name
inputs = self.inputs
if len(inputs) == 0:
code += instr.transpile()
elif len(inputs) == 1:
inp = inputs[0]
code += instr.transpile() + inp.transpile(indent)
else:
code += inputs[1].transpile(indent)
code += instr.transpile()
code += inputs[0].transpile(indent)
code += '\n'
indent -= 4
code += ' ' * indent
code += ")"
return code
def z3(self, solver, memory):
instr = self.main_instr
op = instr.name
inputs = self.inputs
output = BitVec(f'{self.pod.index}_{self.index}', 32)
if len(inputs) == 0:
if op == 'push_m':
addr = instr.operands[0].index
mem = memory[addr]
solver.add(mem == output)
elif op == 'push32_i_i':
val = (instr.operands[1].value << 0x10) | instr.operands[0].value
output = BitVecVal(val, 32)
else:
print(op)
elif len(inputs) == 1:
inp = inputs[0]
value = inp.z3(solver, memory)
if op == "pop_m":
addr = instr.operands[0].index
mem = memory[addr]
solver.add(mem == value)
solver.add(mem == output)
elif op == "not":
solver.add(output == ~value)
else:
print(op)
else:
value0 = inputs[0].z3(solver, memory)
value1 = inputs[1].z3(solver, memory)
if op == "xor":
solver.add(output == (value0 ^ value1))
elif op == "or":
solver.add(output == (value0 | value1))
elif op == "and":
solver.add(output == (value0 & value1))
elif op == "add":
solver.add(output == (value0 + value1))
elif op == "sub":
solver.add(output == (value1 - value0))
elif op == "mul":
solver.add(output == (value1 * value0))
elif op == "shl":
solver.add(output == (value1 << value0))
elif op == "shr":
solver.add(output == (value1 >> value0))
else:
print(op)
return output
class Pod:
def __init__(self, n_childs, offset, megalosaure, i):
self.megalosaure = megalosaure
self.n_childs = n_childs
self.offset = offset
self.childs = []
self.code_size = 9
self.code_base = 0x50c0
self.stages = []
self.index = i
for i in range(n_childs):
self.childs.append(Child(self, i))
def build_ast(self):
locks = [(True, None) for _ in range(0x26c9)]
to_schedule = [child for child in self.childs]
next_stage_schedule = [1]
while next_stage_schedule != []:
current_stage = []
next_stage_schedule = []
while to_schedule != []:
child = to_schedule.pop(0)
child.consume_locks(locks)
if child.is_schedulable(locks):
current_stage.append(child)
child.mark_unlocks(locks)
else:
next_stage_schedule.append(child)
to_schedule = next_stage_schedule
self.stages.append(current_stage)
return self.stages
def transpile(self):
code = f"uint32_t m{self.index + 2} = {self.stages[-1][0].transpile()};\n"
return code
def z3(self, solver, memory):
self.stages[-1][0].z3(solver, memory)
class Megalosaure:
name = "megalosaure"
address_size = 2
default_int_size = 4
instr_aligment = 2
max_instr_length = 12
def __init__(self, path):
self.elf = ELF(path)
print('[+] Loading virtual machines')
self.podinfo_addresses = 0x444220
self.pods = []
for i in range(0x2c):
podinfo_b = self.elf.read(self.podinfo_addresses + i * 8, 8)
n_childs = struct.unpack('<L', podinfo_b[:4])[0]
offset = struct.unpack('<L', podinfo_b[4:])[0]
pod = Pod(n_childs, offset, self, i)
self.pods.append(pod)
def build_ast(self):
print('[+] Lifting AST')
for pod in self.pods:
pod.build_ast()
def transpile(self, path, depth):
print('[+] Transpiling to C')
code = "#include <stdint.h>\nuint64_t megalosaure(uint32_t m0, uint32_t m1) {\n"
for i in range(depth):
code += self.pods[i].transpile()
code += f"return (((uint64_t)m{depth + 1}) << 32) | m{depth};\n"
code += "}\n"
print(f'[+] Transpiled to {path}')
with open(path, 'w') as f:
f.write(code)
return code
def z3(self, depth, desired):
print('[+] Building z3 solver')
memory = [BitVec(f'm{i}', 32) for i in range(0x2e)]
memory[0] = BitVecVal(0x43534346, 32)
solver = Solver()
for i in range(depth):
self.pods[i].z3(solver, memory)
m44 = desired % (2**32)
m45 = desired >> 32
solver.add(memory[44] == m44)
solver.add(memory[45] == m45)
print('[+] Running solver')
print(solver.check())
m = solver.model()
print(m)
return code
meg = Megalosaure('./megalosaure')
meg.build_ast()
#meg.z3(0x2c, 0x9b07e7ce91a8a7b5)
meg.transpile('./megalosaure.c', 0x2c)

@ -1,12 +0,0 @@
#!/usr/bin/env python3
from Crypto.Util.number import inverse
import struct
import os
N = 2**32
def reverse(desired_out, mult):
return ((desired_out) * inverse(mult, N)) % N
first = reverse(0xa4e1a60a, 0x1337)
print(struct.pack('<L', first))

@ -1,88 +0,0 @@
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/wait.h>
uint64_t megalosaure(uint32_t m0, uint32_t m1);
int block = 1;
int main(int argc, char **argv)
{
char charset[18] = "0123456789abcdef}";
const uint64_t ref[9] = { 0x9b07e7ce91a8a7b5, 0x9e819eac35e7e97c, 0xfd401d3317aa6b5f, 0xdf16a32fbd9d5587, 0x80c561ac0dab4fae, 0x9237d1ddd368e209, 0x07ebe4f6ee26882c, 0xb72ffd11e878303b, 0x99d2a7dc8267bf3f };
int charset_len = 16;
int pid = 0;
for (int i = 0; i < 7; ++i)
{
if (pid == 0)
{
pid = fork();
if (pid == 0)
{
block++;
}
}
}
char m0_c[8] = {'0', '0', '0', '0', '0', '0', '0', '0'};
uint32_t *m0 = (uint32_t *)(&m0_c);
uint32_t *m1 = (uint32_t *)(&m0_c[4]);
uint64_t res;
uint64_t x_int = ref[block - 1];
char *x = (char*)&x_int;
if (block == 8)
{
charset_len += 2;
}
for (int i0 = 0; i0 < 18; ++i0)
{
m0_c[0] = charset[i0] ^ x[0];
for (int i1 = 0; i1 < charset_len; ++i1)
{
m0_c[1] = charset[i1] ^ x[1];
for (int i2 = 0; i2 < charset_len; ++i2)
{
m0_c[2] = charset[i2] ^ x[2];
for (int i3 = 0; i3 < charset_len; ++i3)
{
m0_c[3] = charset[i3] ^ x[3];
for (int i4 = 0; i4 < charset_len; ++i4)
{
m0_c[4] = charset[i4] ^ x[4];
for (int i5 = 0; i5 < charset_len; ++i5)
{
m0_c[5] = charset[i5] ^ x[5];
for (int i6 = 0; i6 < charset_len; ++i6)
{
m0_c[6] = charset[i6] ^ x[6];
for (int i7 = 0; i7 < charset_len; ++i7)
{
m0_c[7] = charset[i7] ^ x[7];
res = megalosaure(*m0, *m1);
if (res == ref[block])
{
char flag[9];
flag[8] = 0;
uint64_t *flag_ptr = (uint64_t*)flag;
*flag_ptr = *m0 + (((uint64_t)*m1) << 32);
*flag_ptr ^= x_int;
printf("Block %d: %s\n", block, flag);
if (pid != 0)
waitpid(pid, NULL, 0);
return 0;
}
}
}
}
}
}
}
}
}
}

@ -1,210 +0,0 @@
import struct
from binaryninja import *
tI = lambda e: InstructionTextToken(InstructionTextTokenType.InstructionToken, e)
tt = lambda e: InstructionTextToken(InstructionTextTokenType.TextToken, e)
tr = lambda e: InstructionTextToken(InstructionTextTokenType.RegisterToken, e)
ti = lambda e: InstructionTextToken(InstructionTextTokenType.IntegerToken, e, int(e, 16))
ta = lambda e: InstructionTextToken(InstructionTextTokenType.PossibleAddressToken, e, int(e, 16))
ts = lambda e: InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, e)
opcodes = {
0x0: "pushread_i_f",
0x1: "push_m",
0x2: "push32_i_i",
0x3: "popwrite_i_f",
0x4: "pop_m",
0x5: "dup",
0x6: "add",
0x7: "sub",
0x8: "mul",
0x9: "mod",
0xa: "xor",
0xb: "and",
0xc: "or",
0xd: "shr",
0xe: "shl",
0xf: "not",
0x10: "neg",
0x11: "nop",
0x12: "exit",
}
registers = [
"ip",
"sp"
]
class Stream:
i: int
buf: bytes
def __init__(self, buf) -> None:
self.pos = 0
self.buf = buf
def read_u16(self) -> Optional[int]:
try:
val = struct.unpack("<H", self.buf[self.pos:self.pos + 2])
self.pos += 2
return val[0]
except:
return None
def read_i16(self) -> Optional[int]:
try:
val = struct.unpack("<h", self.buf[self.pos:self.pos + 2])
self.pos += 2
return val[0]
except:
return None
def read_u32(self) -> Optional[int]:
try:
val = struct.unpack("<I", self.buf[self.pos:self.pos + 4])
self.pos += 4
return val[0]
except:
return None
def read_i32(self) -> Optional[int]:
try:
val = struct.unpack("<i", self.buf[self.pos:self.pos + 4])
self.pos += 4
return val[0]
except:
return None
class Operand:
pass
class Register(Operand):
index: int
name: str
def __init__(self, index: int) -> None:
self.index = index
self.name = f"r{hex(index)}"
class Mem(Operand):
index: int
name: str
def __init__(self, index: int) -> None:
self.index = index
self.name = f"m[{hex(index)}]"
class Imm(Operand):
value: int
unsigned: int
def __init__(self, value: int) -> None:
self.value = value
self.raw = struct.unpack("<I", struct.pack("<i", self.value))[0]
class Instruction:
opcode: int
operands: List[Operand]
addr: int
name: str
size: int
def __init__(self, opcode, operands, addr, opcodes) -> None:
self.opcode = opcode
self.operands = operands
self.addr = addr
self.name = opcodes[self.opcode] if self.opcode in opcodes else "invalid"
self.size = 2 * (len(operands) + 1)
@staticmethod
def disassemble(data: bytes, addr: int, opcodes = opcodes, registers = registers) -> 'Instruction':
stream = Stream(data)
opcode = stream.read_u16()
operands = []
if opcode not in opcodes:
return Instruction(opcode, [], addr, opcodes)
mnemonic = opcodes[opcode]
mnemonic_decomp = mnemonic.split('_')
mnemonic_decomp.pop(0)
for element in mnemonic_decomp:
if element == 'i':
value = stream.read_i16()
operand = Imm(value)
operands.append(operand)
elif element == 'm':
value = stream.read_u16()
operand = Mem(value)
operands.append(operand)
elif element == 'f':
n_regs = operands[-1].value
for i in range(n_regs):
index = stream.read_u16()
operand = Register(index)
operands.append(operand)
return Instruction(opcode, operands, addr, opcodes)
def to_tokens(self) -> List[InstructionTextToken]:
tokens = [tI(self.name)]
for op in self.operands:
tokens.append(tt(" "))
if isinstance(op, Register):
tokens.append(tr(op.name))
elif isinstance(op, Mem):
tokens.append(tr(op.name))
elif isinstance(op, Imm):
if "rel" in self.name or self.name == "call_imm":
tokens.append(ta(str(hex(self.jmp_dest()))))
else:
tokens.append(ti(str(op.value)))
tokens.append(tt(" ("))
tokens.append(ti(str(hex(op.raw))))
tokens.append(tt(")"))
return tokens
def info(self) -> InstructionInfo:
info = InstructionInfo()
info.length = self.size
if "exit" in self.name or "stop" in self.name or self.name == "ret":
info.add_branch(BranchType.FunctionReturn)
return info
class Megalosaure(Architecture):
name = "megalosaure"
address_size = 2
default_int_size = 4
instr_aligment = 2
max_instr_length = 12
regs = {reg: RegisterInfo(reg, 4) for reg in registers}
stack_pointer = "sp"
def get_instruction_text(self, data, addr):
instruction = Instruction.disassemble(data, addr)
return instruction.to_tokens(), instruction.size
def get_instruction_info(self, data, addr):
instruction = Instruction.disassemble(data, addr)
return instruction.info()
def get_instruction_low_level_il(self, data, addr, il):
pass
Megalosaure.register()
arch = Architecture["megalosaure"]

@ -1,29 +0,0 @@
#include <stdint.h>
#include <stdio.h>
uint64_t megalosaure(uint32_t m0, uint32_t m1);
int main(int argc, char **argv)
{
char m0_c[8] = {'F', 'C', 'S', 'C', '{', '0', '0', '0'};
uint32_t *m0 = (uint32_t *)(m0_c);
uint32_t *m1 = (uint32_t *)(&m0_c[4]);
for (int i = 30; i < 127; ++i)
{
m0_c[5] = i;
for (int j = 30; j < 127; ++j)
{
m0_c[6] = j;
for (int k = 30; k < 127; ++k)
{
m0_c[7] = k;
uint64_t res = megalosaure(*m0, *m1);
if (res == 0x9b07e7ce91a8a7b5)
{
printf("%s\n", m0_c);
return 0;
}
}
}
}
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 214 KiB

@ -1,228 +0,0 @@
#!/usr/bin/env python3
import struct
from typing import Optional
import copy
regs = {
0x42048: "DT_RELA",
0x42068: "DT_RELASZ",
0x42088: "r0",
0x420a0: "r1",
0x420b8: "r2",
0x420d0: "r3",
0x420e8: "r4",
0x42100: "r5",
0x42118: "r6",
0x42130: "r7",
0x42148: "r8",
0x42160: "r9",
0x42178: "r10",
0x42190: "r11",
0x421a8: "r12",
0x421c0: "r13",
0x421d8: "r14",
0x421f0: "r15",
0x42090: "r0.size",
0x420a8: "r1.size",
0x420c0: "r2.size",
0x420d8: "r3.size",
0x420f0: "r4.size",
0x42108: "r5.size",
0x42120: "r6.size",
0x42138: "r7.size",
0x42150: "r8.size",
0x42168: "r9.size",
0x42180: "r10.size",
0x42198: "r11.size",
0x421b0: "r12.size",
0x421c8: "r13.size",
0x421e0: "r14.size",
0x421f8: "r15.size"
}
class Stream:
i: int
buf: bytes
def __init__(self, buf) -> None:
self.pos = 0
self.buf = buf
def read_u8(self) -> Optional[int]:
try:
val = struct.unpack("<B", self.buf[self.pos:self.pos + 1])
self.pos += 1
return val[0]
except:
return None
def read_u16(self) -> Optional[int]:
try:
val = struct.unpack("<H", self.buf[self.pos:self.pos + 2])
self.pos += 2
return val[0]
except:
return None
def read_u32(self) -> Optional[int]:
try:
val = struct.unpack("<I", self.buf[self.pos:self.pos + 4])
self.pos += 4
return val[0]
except:
return None
def read_u64(self) -> Optional[int]:
try:
val = struct.unpack("<Q", self.buf[self.pos:self.pos + 8])
self.pos += 8
return val[0]
except:
return None
def is_done(self):
return self.pos >= len(self.buf)
class RelaEnt:
def __init__(self, stream):
self.addr = stream.read_u64()
self.type = stream.read_u32()
self.symbol = stream.read_u32()
self.addend = stream.read_u64()
class Rela:
def __init__(self, rela_addr, rela_size, elf):
self.rela_addr = rela_addr
self.rela_size = rela_size
self.elf = elf
def pop(self):
if self.rela_size <= 0:
return None
stream = Stream(self.elf.get_data(self.rela_addr, 0x18))
entry = RelaEnt(stream)
entry.offset = self.rela_addr
self.rela_addr += 0x18
self.rela_size -= 0x18
return entry
def peek(self):
if self.rela_size <= 0:
return None
stream = Stream(self.elf.get_data(self.rela_addr, 0x18))
entry = RelaEnt(stream)
entry.offset = self.rela_addr
return entry
class Symbol:
def __init__(self, data):
stream = Stream(data)
self.name = stream.read_u16()
self.info = stream.read_u8()
self.other = stream.read_u8()
self.shndx = stream.read_u32()
self.value = stream.read_u64()
self.size = stream.read_u64()
class Elf:
def __init__(self, path):
with open(path, 'rb') as f:
self._data = bytearray(f.read())
self.dynt_rela = 0x42048
self.dynt_relasz = 0x42068
self.symtab = 0x42080
self.breakpoints = {}
def dump(self, path):
with open(path, 'wb') as f:
f.write(self._data)
def get_data(self, addr, size):
addr -= 0x40000
return self._data[addr:addr + size]
def set_data(self, addr, data):
addr -= 0x40000
self._data[addr:addr + len(data)] = data
def set_flag(self, flag):
addr = 0x5a100
addr -= 0x40000
self._data[addr:addr + len(flag)] = flag
def get_rela(self):
rela_addr = Stream(self.get_data(self.dynt_rela, 8)).read_u64()
rela_size = Stream(self.get_data(self.dynt_relasz, 8)).read_u64()
return Rela(rela_addr, rela_size, self)
def get_symbol(self, i):
sym_data = self.get_data(self.symtab + 0x18 * i, 0x18)
return Symbol(sym_data)
def apply_relocs(self):
rela = self.get_rela()
patched_code = False
asm = ""
while True:
entry = rela.pop()
if not entry:
break
symbol = self.get_symbol(entry.symbol)
dst_name = regs[entry.addr] if entry.addr in regs else f'[{hex(entry.addr)}]'
line = f'{hex(entry.offset)}: '
if entry.type == 1:
data_int = (symbol.value + entry.addend) % 2**64
data = struct.pack('<Q', data_int)
line += f'add {dst_name}, r{entry.symbol}, ${hex(entry.addend)}'
elif entry.type == 5:
data = self.get_data(symbol.value + entry.addend, symbol.size)
line += f'mov {dst_name}, [r{entry.symbol}].{symbol.size}'
if entry.addend != 0:
raise
elif entry.type == 8:
data_int = entry.addend
data = struct.pack('<Q', data_int)
src_name = f'&{regs[entry.addend]}' if entry.addend in regs else f'${hex(entry.addend)}'
line += f'mov {dst_name}, {src_name}'
else:
print(hex(entry.type))
raise
line += ' ' * (50 - len(line)) + f'# {bytes(data).hex()}'
if entry.addr > entry.offset and entry.addr < 0x49000:
next_instr = rela.peek()
if not (entry.addr >= next_instr.offset and entry.addr < next_instr.offset + 0x18):
line += " PATCHING FAR"
else:
line += ' PATCHING CODE'
asm += line + '\n'
self.set_data(entry.addr, data)
if entry.addr >= 0x41000 and entry.addr < 0x41083:
patched_code = True
asm += '#End of block\n'
return asm, patched_code
def main():
elf = Elf('./svartalfheim')
asm = ""
n = 0
while n <= 7:
step_asm, patched_code = elf.apply_relocs()
if patched_code:
asm += '# NATIVE CODE LOADING:\n'
n += 1
elf.dump(f'./dumps/pydumped{n}')
asm += step_asm
print(asm)
if __name__ == "__main__":
main()

Binary file not shown.

Before

Width:  |  Height:  |  Size: 403 KiB

@ -1,9 +0,0 @@
int64_t _start()
{
int64_t path = '_';
syscall(sys_unlink {0x57}, &path);
int32_t fd = syscall(sys_open {2}, &path, O_CREAT | O_WRONLY);
syscall(sys_write {1}, fd, &__elf_header, 0x1a228);
syscall(sys_close {3}, fd);
syscall(sys_execve {0x3b}, &path, nullptr, nullptr);
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 502 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 82 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 75 KiB

@ -1,273 +0,0 @@
#!/usr/bin/env python3
import struct
from typing import Optional
import copy
regs = {
0x42048: "DT_RELA",
0x42068: "DT_RELASZ",
0x42088: "r0",
0x420a0: "r1",
0x420b8: "r2",
0x420d0: "r3",
0x420e8: "r4",
0x42100: "r5",
0x42118: "r6",
0x42130: "r7",
0x42148: "r8",
0x42160: "r9",
0x42178: "r10",
0x42190: "r11",
0x421a8: "r12",
0x421c0: "r13",
0x421d8: "r14",
0x421f0: "r15",
0x42090: "r0.size",
0x420a8: "r1.size",
0x420c0: "r2.size",
0x420d8: "r3.size",
0x420f0: "r4.size",
0x42108: "r5.size",
0x42120: "r6.size",
0x42138: "r7.size",
0x42150: "r8.size",
0x42168: "r9.size",
0x42180: "r10.size",
0x42198: "r11.size",
0x421b0: "r12.size",
0x421c8: "r13.size",
0x421e0: "r14.size",
0x421f8: "r15.size"
}
class Stream:
i: int
buf: bytes
def __init__(self, buf) -> None:
self.pos = 0
self.buf = buf
def read_u8(self) -> Optional[int]:
try:
val = struct.unpack("<B", self.buf[self.pos:self.pos + 1])
self.pos += 1
return val[0]
except:
return None
def read_u16(self) -> Optional[int]:
try:
val = struct.unpack("<H", self.buf[self.pos:self.pos + 2])
self.pos += 2
return val[0]
except:
return None
def read_u32(self) -> Optional[int]:
try:
val = struct.unpack("<I", self.buf[self.pos:self.pos + 4])
self.pos += 4
return val[0]
except:
return None
def read_u64(self) -> Optional[int]:
try:
val = struct.unpack("<Q", self.buf[self.pos:self.pos + 8])
self.pos += 8
return val[0]
except:
return None
def is_done(self):
return self.pos >= len(self.buf)
class RelaEnt:
def __init__(self, stream):
self.addr = stream.read_u64()
self.type = stream.read_u32()
self.symbol = stream.read_u32()
self.addend = stream.read_u64()
class Rela:
def __init__(self, rela_addr, rela_size, elf):
self.rela_addr = rela_addr
self.rela_size = rela_size
self.elf = elf
def pop(self):
if self.rela_size <= 0:
return None
stream = Stream(self.elf.get_data(self.rela_addr, 0x18))
entry = RelaEnt(stream)
entry.offset = self.rela_addr
self.rela_addr += 0x18
self.rela_size -= 0x18
return entry
def peek(self):
if self.rela_size <= 0:
return None
stream = Stream(self.elf.get_data(self.rela_addr, 0x18))
entry = RelaEnt(stream)
entry.offset = self.rela_addr
return entry
class Symbol:
def __init__(self, data):
stream = Stream(data)
self.name = stream.read_u16()
self.info = stream.read_u8()
self.other = stream.read_u8()
self.shndx = stream.read_u32()
self.value = stream.read_u64()
self.size = stream.read_u64()
class Elf:
def __init__(self, path):
with open(path, 'rb') as f:
self._data = bytearray(f.read())
self.dynt_rela = 0x42048
self.dynt_relasz = 0x42068
self.symtab = 0x42080
self.breakpoints = {}
def dump(self, path):
with open(path, 'wb') as f:
f.write(self._data)
def get_data(self, addr, size):
addr -= 0x40000
return self._data[addr:addr + size]
def set_data(self, addr, data):
addr -= 0x40000
self._data[addr:addr + len(data)] = data
def set_flag(self, flag):
addr = 0x5a100
addr -= 0x40000
self._data[addr:addr + len(flag)] = flag
def get_rela(self):
rela_addr = Stream(self.get_data(self.dynt_rela, 8)).read_u64()
rela_size = Stream(self.get_data(self.dynt_relasz, 8)).read_u64()
return Rela(rela_addr, rela_size, self)
def get_symbol(self, i):
sym_data = self.get_data(self.symtab + 0x18 * i, 0x18)
return Symbol(sym_data)
def set_breakpoint(self, addr, func, res):
self.breakpoints[addr] = (func, res)
def apply_relocs(self):
rela = self.get_rela()
patched_code = False
asm = ""
while True:
entry = rela.pop()
if not entry:
break
symbol = self.get_symbol(entry.symbol)
dst_name = regs[entry.addr] if entry.addr in regs else f'[{hex(entry.addr)}]'
line = f'{hex(entry.offset)}: '
if entry.type == 1:
data_int = (symbol.value + entry.addend) % 2**64
data = struct.pack('<Q', data_int)
line += f'add {dst_name}, r{entry.symbol}, ${hex(entry.addend)}'
elif entry.type == 5:
data = self.get_data(symbol.value + entry.addend, symbol.size)
line += f'mov {dst_name}, [r{entry.symbol}].{symbol.size}'
if entry.addend != 0:
raise
elif entry.type == 8:
data_int = entry.addend
data = struct.pack('<Q', data_int)
src_name = f'&{regs[entry.addend]}' if entry.addend in regs else f'${hex(entry.addend)}'
line += f'mov {dst_name}, {src_name}'
else:
print(hex(entry.type))
raise
line += ' ' * (50 - len(line)) + f'# {bytes(data).hex()}'
if entry.addr > entry.offset and entry.addr < 0x49000:
next_instr = rela.peek()
if not (entry.addr >= next_instr.offset and entry.addr < next_instr.offset + 0x18):
line += " PATCHING FAR"
else:
line += ' PATCHING CODE'
asm += line + '\n'
if entry.offset in self.breakpoints:
stop = not self.breakpoints[entry.offset][0](self, entry, line, data, self.breakpoints[entry.offset][1])
if stop:
return asm, stop
self.set_data(entry.addr, data)
if entry.addr >= 0x41000 and entry.addr < 0x41083:
patched_code = True
asm += '#End of block\n'
return asm, patched_code
def fetch_r4(elf, entry, asm, data, res):
if data != b'\x00':
res.append(True)
return True
else:
return False
def check(elf, entry, asm, data):
if data == b'\x00':
print(asm)
def main():
elf = Elf('./svartalfheim')
asm = ""
n = 0
while n <= 3:
#print('---')
step_asm, patched_code = elf.apply_relocs()
if patched_code:
asm += '# NATIVE CODE LOADING:\n'
n += 1
elf.dump(f'./dumps/pydumped{n}')
asm += step_asm
flag = bytearray(0x46)
for i in range(0x46):
for b in range(0x30, 127):
n = 4
flag[i] = b
before_read = copy.deepcopy(elf)
res = []
before_read.set_breakpoint(0x47000, fetch_r4, res)
before_read.set_breakpoint(0x47270, fetch_r4, res)
before_read.set_flag(flag)
while n < 7:
step_asm, patched_code = before_read.apply_relocs()
if patched_code:
asm += '# NATIVE CODE LOADING:\n'
n += 1
elf.dump(f'./dumps/pydumped{n}')
break
asm += step_asm
if len(res) == 2 * (i + 1):
print(flag)
break
#print(asm)
if __name__ == "__main__":
main()

Binary file not shown.

Before

Width:  |  Height:  |  Size: 96 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 708 KiB