TBOX
This commit is contained in:
parent
d8021e9352
commit
d5ef468573
|
@ -1,8 +1,6 @@
|
|||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/* AES-128 simple implementation template and testing */
|
||||
|
||||
/*
|
||||
Author: Ondrej Hladuvka, hladuond@fit.cvut.cz
|
||||
Template: Jiri Bucek 2017
|
||||
|
@ -35,13 +33,13 @@ const uint8_t SBOX[256] = {
|
|||
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 // F
|
||||
};
|
||||
|
||||
const uint8_t rCon[12] = {
|
||||
0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36,
|
||||
};
|
||||
|
||||
/* AES state type */
|
||||
typedef uint32_t t_state[4];
|
||||
|
||||
void print(t_state t) {
|
||||
std::cout << t[0] << ' ' << t[1] << ' ' << t[2] << ' ' << t[3] << '\n';
|
||||
}
|
||||
|
||||
#define word(a0, a1, a2, a3) ((uint32_t)(a0) | ((uint32_t)(a1) << 8) | ((uint32_t)(a2) << 16) | ((uint32_t)(a3) << 24))
|
||||
|
||||
uint8_t wbyte(uint32_t w, int pos) {
|
||||
|
@ -114,27 +112,6 @@ void mixColumns(t_state s) {
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Key expansion from 128bits (4*32b)
|
||||
* to 11 round keys (11*4*32b)
|
||||
* each round key is 4*32b
|
||||
*/
|
||||
void expandKey(uint8_t k[16], uint32_t ek[44]) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
ek[i] = word(k[4*i], k[4*i + 1], k[4*i + 2], k[4*i + 3]);
|
||||
}
|
||||
|
||||
for (int i = 4; i < 44; i++) {
|
||||
uint32_t temp = ek[i - 1];
|
||||
if (i % 4 == 0) {
|
||||
// RotWord, SubWord, and XOR with Rcon
|
||||
temp = subWord((temp >> 8) | (temp << 24)) ^ rCon[i / 4];
|
||||
}
|
||||
ek[i] = ek[i - 4] ^ temp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Adding expanded round key (prepared before) */
|
||||
void addRoundKey(t_state s, uint32_t ek[], short index) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
|
@ -142,7 +119,7 @@ void addRoundKey(t_state s, uint32_t ek[], short index) {
|
|||
}
|
||||
}
|
||||
|
||||
void aes128_5a(uint8_t *in, uint8_t *out, uint32_t * expKey) {
|
||||
void aes128_5(uint8_t *in, uint8_t *out, uint32_t * expKey) {
|
||||
t_state state;
|
||||
|
||||
state[0] = word(in[0], in[1], in[2], in[3]);
|
||||
|
@ -157,7 +134,6 @@ void aes128_5a(uint8_t *in, uint8_t *out, uint32_t * expKey) {
|
|||
mixColumns(state);
|
||||
addRoundKey(state, expKey, round * 4 + 4);
|
||||
}
|
||||
|
||||
subBytes(state);
|
||||
shiftRows(state);
|
||||
addRoundKey(state, expKey, 10 * 4);
|
||||
|
|
|
@ -0,0 +1,165 @@
|
|||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <array>
|
||||
|
||||
/*
|
||||
Author: Ondrej Hladuvka, hladuond@fit.cvut.cz
|
||||
Template: Jiri Bucek 2017
|
||||
AES specification:
|
||||
http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf
|
||||
*/
|
||||
|
||||
namespace aes128_5b {
|
||||
|
||||
/* AES Constants */
|
||||
|
||||
// forward sbox
|
||||
constexpr std::array<uint8_t, 256> SBOX = {
|
||||
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
||||
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, // 0
|
||||
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, // 1
|
||||
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, // 2
|
||||
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, // 3
|
||||
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, // 4
|
||||
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, // 5
|
||||
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, // 6
|
||||
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, // 7
|
||||
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, // 8
|
||||
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, // 9
|
||||
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, // A
|
||||
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, // B
|
||||
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, // C
|
||||
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, // D
|
||||
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, // E
|
||||
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 // F
|
||||
};
|
||||
|
||||
/* AES state type */
|
||||
typedef uint32_t t_state[4];
|
||||
|
||||
void print(t_state t) {
|
||||
std::cout << t[0] << ' ' << t[1] << ' ' << t[2] << ' ' << t[3] << '\n';
|
||||
}
|
||||
|
||||
#define word(a0, a1, a2, a3) ((uint32_t)(a0) | ((uint32_t)(a1) << 8) | ((uint32_t)(a2) << 16) | ((uint32_t)(a3) << 24))
|
||||
|
||||
uint8_t wbyte(uint32_t w, int pos) {
|
||||
return (w >> (pos * 8)) & 0xff;
|
||||
}
|
||||
|
||||
// **************** AES functions ****************
|
||||
uint32_t T0[256], T1[256], T2[256], T3[256];
|
||||
bool t = false;
|
||||
|
||||
constexpr uint8_t xtime(uint8_t c) {
|
||||
uint8_t m = ((c & 0x80) >> 7) * 0x1B;
|
||||
return (c << 1) ^ m;
|
||||
}
|
||||
|
||||
constexpr uint8_t multiply(uint8_t x, uint8_t y) {
|
||||
uint8_t result = 0;
|
||||
while (y) {
|
||||
if (y & 1) result ^= x;
|
||||
x = xtime(x);
|
||||
y >>= 1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
constexpr void generateTBoxes() {
|
||||
for (int i = 0; i < 256; i++) {
|
||||
uint8_t s = SBOX[i];
|
||||
T0[i] = word(multiply(s, 0x02), s, s, multiply(s, 0x03));
|
||||
T1[i] = word(multiply(s, 0x03), multiply(s, 0x02), s, s);
|
||||
T2[i] = word(s, multiply(s, 0x03), multiply(s, 0x02), s);
|
||||
T3[i] = word(s, s, multiply(s, 0x03), multiply(s, 0x02));
|
||||
}
|
||||
}
|
||||
|
||||
void addRoundKey(t_state s, uint32_t ek[], short index) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
s[i] ^= ek[index + i];
|
||||
}
|
||||
}
|
||||
|
||||
void aesRound(uint32_t state[4]) {
|
||||
uint32_t tmp[4];
|
||||
tmp[0] = T0[wbyte(state[0], 0)] ^ T1[wbyte(state[1], 1)] ^ T2[wbyte(state[2], 2)] ^ T3[wbyte(state[3], 3)];
|
||||
tmp[1] = T0[wbyte(state[1], 0)] ^ T1[wbyte(state[2], 1)] ^ T2[wbyte(state[3], 2)] ^ T3[wbyte(state[0], 3)];
|
||||
tmp[2] = T0[wbyte(state[2], 0)] ^ T1[wbyte(state[3], 1)] ^ T2[wbyte(state[0], 2)] ^ T3[wbyte(state[1], 3)];
|
||||
tmp[3] = T0[wbyte(state[3], 0)] ^ T1[wbyte(state[0], 1)] ^ T2[wbyte(state[1], 2)] ^ T3[wbyte(state[2], 3)];
|
||||
for (int i = 0; i < 4; i++) state[i] = tmp[i];
|
||||
}
|
||||
|
||||
void shiftRows(t_state s) {
|
||||
uint32_t tmp[4];
|
||||
|
||||
tmp[0] = s[0] & 0x0000FF00;
|
||||
tmp[1] = s[1] & 0x0000FF00;
|
||||
tmp[2] = s[2] & 0x0000FF00;
|
||||
tmp[3] = s[3] & 0x0000FF00;
|
||||
|
||||
s[0] = (s[0] & 0xFFFF00FF) | tmp[1];
|
||||
s[1] = (s[1] & 0xFFFF00FF) | tmp[2];
|
||||
s[2] = (s[2] & 0xFFFF00FF) | tmp[3];
|
||||
s[3] = (s[3] & 0xFFFF00FF) | tmp[0];
|
||||
|
||||
tmp[0] = s[0] & 0x00FF0000;
|
||||
tmp[1] = s[1] & 0x00FF0000;
|
||||
tmp[2] = s[2] & 0x00FF0000;
|
||||
tmp[3] = s[3] & 0x00FF0000;
|
||||
|
||||
s[0] = (s[0] & 0xFF00FFFF) | tmp[2];
|
||||
s[1] = (s[1] & 0xFF00FFFF) | tmp[3];
|
||||
s[2] = (s[2] & 0xFF00FFFF) | tmp[0];
|
||||
s[3] = (s[3] & 0xFF00FFFF) | tmp[1];
|
||||
|
||||
tmp[0] = s[0] & 0xFF000000;
|
||||
tmp[1] = s[1] & 0xFF000000;
|
||||
tmp[2] = s[2] & 0xFF000000;
|
||||
tmp[3] = s[3] & 0xFF000000;
|
||||
|
||||
s[0] = (s[0] & 0x00FFFFFF) | tmp[3];
|
||||
s[1] = (s[1] & 0x00FFFFFF) | tmp[0];
|
||||
s[2] = (s[2] & 0x00FFFFFF) | tmp[1];
|
||||
s[3] = (s[3] & 0x00FFFFFF) | tmp[2];
|
||||
}
|
||||
|
||||
void aesFinalRound(uint32_t state[4], uint32_t * expKey) {
|
||||
t_state tmp;
|
||||
tmp[0] = word(SBOX[wbyte(state[0], 0)], SBOX[wbyte(state[1], 1)], SBOX[wbyte(state[2], 2)], SBOX[wbyte(state[3], 3)]);
|
||||
tmp[1] = word(SBOX[wbyte(state[1], 0)], SBOX[wbyte(state[2], 1)], SBOX[wbyte(state[3], 2)], SBOX[wbyte(state[0], 3)]);
|
||||
tmp[2] = word(SBOX[wbyte(state[2], 0)], SBOX[wbyte(state[3], 1)], SBOX[wbyte(state[0], 2)], SBOX[wbyte(state[1], 3)]);
|
||||
tmp[3] = word(SBOX[wbyte(state[3], 0)], SBOX[wbyte(state[0], 1)], SBOX[wbyte(state[1], 2)], SBOX[wbyte(state[2], 3)]);
|
||||
for (int i = 0; i < 4; ++i)
|
||||
state[i] = tmp[i] ^ expKey[10 * 4 + i];
|
||||
}
|
||||
|
||||
void aes128_5(uint8_t *in, uint8_t *out, uint32_t * expKey) {
|
||||
if (!t) generateTBoxes();
|
||||
t = true;
|
||||
|
||||
uint32_t state[4] = {
|
||||
word(in[0], in[1], in[2], in[3]),
|
||||
word(in[4], in[5], in[6], in[7]),
|
||||
word(in[8], in[9], in[10], in[11]),
|
||||
word(in[12], in[13], in[14], in[15])
|
||||
};
|
||||
|
||||
addRoundKey(state, expKey, 0);
|
||||
for (int round = 0; round < 9; round++) {
|
||||
aesRound(state);
|
||||
addRoundKey(state, expKey, round * 4 + 4);
|
||||
}
|
||||
aesFinalRound(state, expKey);
|
||||
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
out[i * 4 + 0] = wbyte(state[i], 0);
|
||||
out[i * 4 + 1] = wbyte(state[i], 1);
|
||||
out[i * 4 + 2] = wbyte(state[i], 2);
|
||||
out[i * 4 + 3] = wbyte(state[i], 3);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -9,6 +9,7 @@
|
|||
#include <type_traits>
|
||||
#include "aes_4.cpp"
|
||||
#include "aes_5a.cpp"
|
||||
#include "aes_5b.cpp"
|
||||
|
||||
// AES constants
|
||||
constexpr size_t blockSize = 16;
|
||||
|
@ -71,7 +72,7 @@ void test(void (*aes)(uint8_t *in, uint8_t *out, uint32_t *expKey), uint8_t *in,
|
|||
std::chrono::duration<double, CycleTimeUnit> time = end - start;
|
||||
double timeAVG = time.count() / numTests;
|
||||
|
||||
std::cout << "time :" << time.count()/std::ratio_divide<CycleTimeUnit, TestTimeUnit>::den << "ms\navg time: " << timeAVG << "ns\navg cpu cycles: " << cycles/numTests << std::endl;
|
||||
std::cout << "time: " << time.count()/std::ratio_divide<CycleTimeUnit, TestTimeUnit>::den << "ms\navg time: " << timeAVG << "ns\navg cpu cycles: " << cycles/numTests << std::endl;
|
||||
std::free(tmpBlock);
|
||||
std::free(outBuf);
|
||||
}
|
||||
|
@ -107,112 +108,19 @@ int main() {
|
|||
}
|
||||
opensslCycles = __rdtsc() - opensslCycles;
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
std::chrono::duration<double, std::nano> opensslTime = end - start;
|
||||
std::chrono::duration<double, CycleTimeUnit> opensslTime = end - start;
|
||||
double timeAVG = opensslTime.count() / numTests;
|
||||
std::cout << "avg time: " << timeAVG << "ns\navg cycles: " << opensslCycles/numTests << std::endl;
|
||||
std::cout << "time: " << opensslTime.count()/std::ratio_divide<CycleTimeUnit, TestTimeUnit>::den << "ms\navg time: " << timeAVG << "ns\navg cycles: " << opensslCycles/numTests << std::endl;
|
||||
std::free(tmpBlock);
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
expandKey(key, expandedKey);
|
||||
|
||||
test(aes128_4, input, opensslOutput, expandedKey, iv, "My original implementation");
|
||||
test(aes128_5a::aes128_5a, input, opensslOutput, expandedKey, iv, "My original implementation");
|
||||
// test(aes128, input, opensslOutput, expandedKey, iv, "My original implementation");
|
||||
test(aes128_4, input, opensslOutput, expandedKey, iv, "My original implementation (4)");
|
||||
test(aes128_5a::aes128_5, input, opensslOutput, expandedKey, iv, "With macro (5a)");
|
||||
test(aes128_5b::aes128_5, input, opensslOutput, expandedKey, iv, "With T-Box (5b)");
|
||||
|
||||
std::free(input);
|
||||
std::free(opensslOutput);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// int main() {
|
||||
|
||||
// uint8_t key[keySize];
|
||||
|
||||
// uint8_t* opensslOutput = new uint8_t[payloadSize];
|
||||
// uint8_t* myOutput = new uint8_t[payloadSize];
|
||||
// uint8_t* myOptimOutput = new uint8_t[payloadSize];
|
||||
|
||||
// uint8_t* opensslInput = new uint8_t[payloadSize];
|
||||
// uint8_t* myInput = new uint8_t[payloadSize];
|
||||
// uint8_t* myOptimInput = new uint8_t[payloadSize];
|
||||
|
||||
// std::unique_ptr<uint8_t> iv(static_cast<uint8_t*>(std::aligned_alloc(blockSize, blockSize)));
|
||||
// uint32_t expandedKey[44];
|
||||
|
||||
// std::chrono::duration<double, std::nano> opensslTime(0);
|
||||
// std::chrono::duration<double, std::nano> myTime(0);
|
||||
// std::chrono::duration<double, std::nano> myOptimTime(0);
|
||||
|
||||
// uint64_t opensslCycles;
|
||||
// uint64_t myCycles;
|
||||
// uint64_t myOptimCycles;
|
||||
|
||||
// AES_KEY opensslKey;
|
||||
|
||||
// RAND_bytes(key, keySize);
|
||||
// RAND_bytes(opensslInput, payloadSize);
|
||||
|
||||
// RAND_bytes(iv.get(), blockSize);
|
||||
// xor_into_128bit_u(opensslInput, iv.get());
|
||||
// memcpy(myInput, opensslInput, payloadSize);
|
||||
// memcpy(myOptimInput, opensslInput, payloadSize);
|
||||
|
||||
// expandKey(key, expandedKey);
|
||||
// #pragma GCC diagnostic push
|
||||
// #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
||||
// AES_set_encrypt_key(key, 128, &opensslKey);
|
||||
|
||||
// // OPENSSL
|
||||
// auto start = std::chrono::high_resolution_clock::now();
|
||||
// opensslCycles = __rdtsc();
|
||||
// for (int test = 0; test < numTests; ++test) {
|
||||
// AES_encrypt(opensslInput + blockSize * test, opensslOutput + blockSize * test, &opensslKey);
|
||||
// xor_into_128bit_u(opensslInput + blockSize * test, opensslOutput + blockSize * test);
|
||||
// }
|
||||
// #pragma GCC diagnostic pop
|
||||
// opensslCycles = __rdtsc() - opensslCycles;
|
||||
// auto end = std::chrono::high_resolution_clock::now();
|
||||
// opensslTime += end - start;
|
||||
|
||||
// // My 4
|
||||
// start = std::chrono::high_resolution_clock::now();
|
||||
// myCycles = __rdtsc();
|
||||
// for (int test = 0; test < numTests; ++test) {
|
||||
// aes128(myInput + blockSize * test, myOutput + blockSize * test, expandedKey);
|
||||
// xor_into_128bit_u(myInput + blockSize * test, myOutput + blockSize * test);
|
||||
// }
|
||||
// myCycles = __rdtsc() - myCycles;
|
||||
// end = std::chrono::high_resolution_clock::now();
|
||||
// myTime += end - start;
|
||||
|
||||
// // My 5a
|
||||
// start = std::chrono::high_resolution_clock::now();
|
||||
// myOptimCycles = __rdtsc();
|
||||
// for (int test = 0; test < numTests; ++test) {
|
||||
// aes128(myOptimInput + blockSize * test, myOptimOutput + blockSize * test, expandedKey);
|
||||
// xor_into_128bit_u(myOptimInput + blockSize * test, myOptimOutput + blockSize * test);
|
||||
// }
|
||||
// myOptimCycles = __rdtsc() - myOptimCycles;
|
||||
// end = std::chrono::high_resolution_clock::now();
|
||||
// myOptimTime += end - start;
|
||||
|
||||
// // Verify
|
||||
// if (std::memcmp(myOptimOutput, opensslOutput, payloadSize)) {
|
||||
// std::cout << "Output differs\n";
|
||||
// for (int i = 0; i < 16; ++i)
|
||||
// std::cout << (int)myOutput[i] << "!=" << (int)opensslOutput[i] << '\n';
|
||||
// } else {
|
||||
// std::cout << "Output same\n";
|
||||
// }
|
||||
|
||||
// // Print perf stats
|
||||
// double opensslTimeAVG = opensslTime.count() / numTests;
|
||||
// double myTimeAVG = myTime.count() / numTests;
|
||||
// double myOptimTimeAVG = myOptimTime.count() / numTests;
|
||||
|
||||
// std::cout << "avg openssl time: " << opensslTimeAVG << "ns, cycles: " << opensslCycles/numTests << std::endl;
|
||||
// std::cout << "avg my time: " << myTimeAVG << "ns, cycles: " << myCycles/numTests << std::endl;
|
||||
// std::cout << "avg my optim time: " << myOptimTimeAVG << "ns, cycles: " << myOptimCycles/numTests << std::endl;
|
||||
|
||||
// return opensslOutput[0] ^ myOutput[0] ^ myOptimOutput[0];
|
||||
// }
|
Loading…
Reference in New Issue