From 627e55932f48181218d97c3b8c8a336bd1d66141 Mon Sep 17 00:00:00 2001 From: Linus Yang Date: Fri, 18 Aug 2017 14:25:14 +0800 Subject: [PATCH] AES acceleration for x86_64 and arm64 --- CMakeLists.txt | 5 +- lib/aesacc.c | 306 ++++++++++++++++++++++++++++++++++++++++++ lib/aesacc.h | 12 ++ lib/aesarm.c | 115 ++++++++++++++++ lib/aesarm.h | 84 ++++++++++++ lib/aesarm_table.h | 140 +++++++++++++++++++ lib/aesni.c | 327 +++++++++++++++++++++++++++++++++++++++++++++ lib/aesni.h | 119 +++++++++++++++++ makefile | 2 +- 9 files changed, 1107 insertions(+), 3 deletions(-) create mode 100644 lib/aesacc.c create mode 100644 lib/aesacc.h create mode 100644 lib/aesarm.c create mode 100644 lib/aesarm.h create mode 100644 lib/aesarm_table.h create mode 100644 lib/aesni.c create mode 100644 lib/aesni.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 7846055..cc8ca2a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,10 +2,11 @@ cmake_minimum_required(VERSION 3.7) project(udp2raw_tunnel) set(CMAKE_CXX_STANDARD 11) -set_source_files_properties(lib/aes.c lib/md5.c PROPERTIES LANGUAGE CXX ) +FILE(GLOB AES_FILES lib/aes*.c) +set_source_files_properties(${AES_FILES} lib/md5.c PROPERTIES LANGUAGE CXX ) set(SOURCE_FILES - lib/aes.c + ${AES_FILES} lib/md5.c common.cpp encrypt.cpp diff --git a/lib/aesacc.c b/lib/aesacc.c new file mode 100644 index 0000000..c845576 --- /dev/null +++ b/lib/aesacc.c @@ -0,0 +1,306 @@ +/* + * This file is adapted from PolarSSL 1.3.19 (GPL) + */ + +#include "aes.h" +#include "aesni.h" +#include "aesarm.h" +#include "aesacc.h" + +#include + +#if defined(AES256) && (AES256 == 1) +#define AES_KEYSIZE 256 +#define aes_setkey_enc aesni_setkey_enc_256 +#elif defined(AES192) && (AES192 == 1) +#define AES_KEYSIZE 192 +#define aes_setkey_enc aesni_setkey_enc_192 +#else +#define AES_KEYSIZE 128 +#define aes_setkey_enc aesni_setkey_enc_128 +#endif + +#define AES_NR ((AES_KEYSIZE >> 5) + 6) +#define AES_RKSIZE 272 + +#ifdef HAVE_AMD64 +#define HAVE_HARDAES 1 +#define aes_supported aesni_supported +#define aes_crypt_ecb aesni_crypt_ecb +#define aes_inverse_key(a,b) aesni_inverse_key(a,b,AES_NR) +#endif /* HAVE_AMD64 */ + +#ifdef HAVE_ARM64 +#define HAVE_HARDAES 1 +#define aes_supported aesarm_supported +#define aes_crypt_ecb aesarm_crypt_ecb + +#include "aesarm_table.h" + +#ifndef GET_UINT32_LE +#define GET_UINT32_LE(n,b,i) \ +{ \ + (n) = ( (uint32_t) (b)[(i) ] ) \ + | ( (uint32_t) (b)[(i) + 1] << 8 ) \ + | ( (uint32_t) (b)[(i) + 2] << 16 ) \ + | ( (uint32_t) (b)[(i) + 3] << 24 ); \ +} +#endif + +static void aes_setkey_enc(uint8_t *rk, const uint8_t *key) +{ + unsigned int i; + uint32_t *RK; + + RK = (uint32_t *) rk; + + for( i = 0; i < ( AES_KEYSIZE >> 5 ); i++ ) + { + GET_UINT32_LE( RK[i], key, i << 2 ); + } + + switch( AES_NR ) + { + case 10: + + for( i = 0; i < 10; i++, RK += 4 ) + { + RK[4] = RK[0] ^ RCON[i] ^ + ( (uint32_t) FSb[ ( RK[3] >> 8 ) & 0xFF ] ) ^ + ( (uint32_t) FSb[ ( RK[3] >> 16 ) & 0xFF ] << 8 ) ^ + ( (uint32_t) FSb[ ( RK[3] >> 24 ) & 0xFF ] << 16 ) ^ + ( (uint32_t) FSb[ ( RK[3] ) & 0xFF ] << 24 ); + + RK[5] = RK[1] ^ RK[4]; + RK[6] = RK[2] ^ RK[5]; + RK[7] = RK[3] ^ RK[6]; + } + break; + + case 12: + + for( i = 0; i < 8; i++, RK += 6 ) + { + RK[6] = RK[0] ^ RCON[i] ^ + ( (uint32_t) FSb[ ( RK[5] >> 8 ) & 0xFF ] ) ^ + ( (uint32_t) FSb[ ( RK[5] >> 16 ) & 0xFF ] << 8 ) ^ + ( (uint32_t) FSb[ ( RK[5] >> 24 ) & 0xFF ] << 16 ) ^ + ( (uint32_t) FSb[ ( RK[5] ) & 0xFF ] << 24 ); + + RK[7] = RK[1] ^ RK[6]; + RK[8] = RK[2] ^ RK[7]; + RK[9] = RK[3] ^ RK[8]; + RK[10] = RK[4] ^ RK[9]; + RK[11] = RK[5] ^ RK[10]; + } + break; + + case 14: + + for( i = 0; i < 7; i++, RK += 8 ) + { + RK[8] = RK[0] ^ RCON[i] ^ + ( (uint32_t) FSb[ ( RK[7] >> 8 ) & 0xFF ] ) ^ + ( (uint32_t) FSb[ ( RK[7] >> 16 ) & 0xFF ] << 8 ) ^ + ( (uint32_t) FSb[ ( RK[7] >> 24 ) & 0xFF ] << 16 ) ^ + ( (uint32_t) FSb[ ( RK[7] ) & 0xFF ] << 24 ); + + RK[9] = RK[1] ^ RK[8]; + RK[10] = RK[2] ^ RK[9]; + RK[11] = RK[3] ^ RK[10]; + + RK[12] = RK[4] ^ + ( (uint32_t) FSb[ ( RK[11] ) & 0xFF ] ) ^ + ( (uint32_t) FSb[ ( RK[11] >> 8 ) & 0xFF ] << 8 ) ^ + ( (uint32_t) FSb[ ( RK[11] >> 16 ) & 0xFF ] << 16 ) ^ + ( (uint32_t) FSb[ ( RK[11] >> 24 ) & 0xFF ] << 24 ); + + RK[13] = RK[5] ^ RK[12]; + RK[14] = RK[6] ^ RK[13]; + RK[15] = RK[7] ^ RK[14]; + } + break; + } +} + +static void aes_inverse_key(uint8_t *invkey, const uint8_t *fwdkey) +{ + int i, j; + uint32_t *RK; + uint32_t *SK; + + RK = (uint32_t *) invkey; + SK = ((uint32_t *) fwdkey) + AES_NR * 4; + + *RK++ = *SK++; + *RK++ = *SK++; + *RK++ = *SK++; + *RK++ = *SK++; + + for( i = AES_NR - 1, SK -= 8; i > 0; i--, SK -= 8 ) + { + for( j = 0; j < 4; j++, SK++ ) + { + *RK++ = RT0[ FSb[ ( *SK ) & 0xFF ] ] ^ + RT1[ FSb[ ( *SK >> 8 ) & 0xFF ] ] ^ + RT2[ FSb[ ( *SK >> 16 ) & 0xFF ] ] ^ + RT3[ FSb[ ( *SK >> 24 ) & 0xFF ] ]; + } + } + + *RK++ = *SK++; + *RK++ = *SK++; + *RK++ = *SK++; + *RK++ = *SK++; +} + +#endif /* HAVE_ARM64 */ + +#ifdef HAVE_HARDAES +/* + * AESNI-CBC buffer encryption/decryption + */ +static void aes_crypt_cbc( int mode, + uint8_t* rk, + uint32_t length, + uint8_t iv[16], + const uint8_t *input, + uint8_t *output ) +{ + int i; + uint8_t temp[16]; + + if( mode == AES_DECRYPT ) + { + while( length > 0 ) + { + memcpy( temp, input, 16 ); + aes_crypt_ecb( AES_NR, rk, mode, input, output ); + + for( i = 0; i < 16; i++ ) + output[i] = (uint8_t)( output[i] ^ iv[i] ); + + memcpy( iv, temp, 16 ); + + input += 16; + output += 16; + length -= 16; + } + } + else + { + while( length > 0 ) + { + for( i = 0; i < 16; i++ ) + output[i] = (uint8_t)( input[i] ^ iv[i] ); + + aes_crypt_ecb( AES_NR, rk, mode, output, output ); + memcpy( iv, output, 16 ); + + input += 16; + output += 16; + length -= 16; + } + } +} + +#endif /* HAVE_HARDAES */ + +int AESACC_supported(void) +{ +#if defined(HAVE_HARDAES) + return aes_supported(); +#else + return 0; +#endif +} + +void AESACC_CBC_encrypt_buffer(uint8_t* output, uint8_t* input, uint32_t length, const uint8_t* key, const uint8_t* iv) +{ +#if defined(HAVE_HARDAES) + uint8_t iv_tmp[16]; + uint8_t rk[AES_RKSIZE]; + + if (aes_supported()) + { + if (key == NULL || iv == NULL) + { + return; + } + memcpy(iv_tmp, iv, 16); + aes_setkey_enc(rk, key); + aes_crypt_cbc(AES_ENCRYPT, rk, \ + length, iv_tmp, input, output); + return; + } +#endif + + AES_CBC_encrypt_buffer(output, input, length, key, iv); +} + +void AESACC_CBC_decrypt_buffer(uint8_t* output, uint8_t* input, uint32_t length, const uint8_t* key, const uint8_t* iv) +{ +#if defined(HAVE_HARDAES) + uint8_t iv_tmp[16]; + uint8_t rk[AES_RKSIZE]; + uint8_t rk_tmp[AES_RKSIZE]; + + if (aes_supported()) + { + if (key == NULL || iv == NULL) + { + return; + } + memcpy(iv_tmp, iv, 16); + aes_setkey_enc(rk_tmp, key); + aes_inverse_key(rk, rk_tmp); + aes_crypt_cbc(AES_DECRYPT, rk, \ + length, iv_tmp, input, output); + return; + } +#endif + + AES_CBC_decrypt_buffer(output, input, length, key, iv); +} + +void AESACC_ECB_encrypt(const uint8_t* input, const uint8_t* key, uint8_t* output, const uint32_t length) +{ +#if defined(HAVE_HARDAES) + uint8_t rk[AES_RKSIZE]; + + if (aes_supported()) + { + if (key == NULL) + { + return; + } + aes_setkey_enc(rk, key); + aes_crypt_ecb(AES_NR, rk, AES_ENCRYPT, input, output); + return; + } +#endif + + AES_ECB_encrypt(input, key, output, length); +} + +void AESACC_ECB_decrypt(const uint8_t* input, const uint8_t* key, uint8_t *output, const uint32_t length) +{ +#if defined(HAVE_HARDAES) + uint8_t rk[AES_RKSIZE]; + uint8_t rk_tmp[AES_RKSIZE]; + + if (aes_supported()) + { + if (key == NULL) + { + return; + } + aes_setkey_enc(rk_tmp, key); + aes_inverse_key(rk, rk_tmp); + aes_crypt_ecb(AES_NR, rk, AES_DECRYPT, input, output); + return; + } +#endif + + AES_ECB_decrypt(input, key, output, length); +} diff --git a/lib/aesacc.h b/lib/aesacc.h new file mode 100644 index 0000000..61012e6 --- /dev/null +++ b/lib/aesacc.h @@ -0,0 +1,12 @@ +#ifndef _AESACC_H_ +#define _AESACC_H_ + +#include + +int AESACC_supported(void); +void AESACC_ECB_encrypt(const uint8_t* input, const uint8_t* key, uint8_t *output, const uint32_t length); +void AESACC_ECB_decrypt(const uint8_t* input, const uint8_t* key, uint8_t *output, const uint32_t length); +void AESACC_CBC_encrypt_buffer(uint8_t* output, uint8_t* input, uint32_t length, const uint8_t* key, const uint8_t* iv); +void AESACC_CBC_decrypt_buffer(uint8_t* output, uint8_t* input, uint32_t length, const uint8_t* key, const uint8_t* iv); + +#endif /* _AESACC_H_ */ diff --git a/lib/aesarm.c b/lib/aesarm.c new file mode 100644 index 0000000..2a76d3d --- /dev/null +++ b/lib/aesarm.c @@ -0,0 +1,115 @@ +/* + * This file is adapted from https://github.com/CriticalBlue/mbedtls + */ + +/* + * ARMv8-A Cryptography Extension AES support functions + * + * Copyright (C) 2016, CriticalBlue Limited, All Rights Reserved + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This file is part of mbed TLS (https://tls.mbed.org) + */ + +#include "aesarm.h" + +#if defined(HAVE_ARM64) + +#include +#include +#include + +/* + * ARMv8a Crypto Extension support detection routine + */ +int aesarm_supported( void ) +{ + static int done = 0; + static unsigned int c = 0; + + if ( ! done ) + { + c = getauxval(AT_HWCAP); + done = 1; + } + + return ( c & HWCAP_AES ) != 0; +} + +/* + * ARMv8a AES-ECB block en(de)cryption + */ +void aesarm_crypt_ecb( int nr, + unsigned char *rk, + int mode, + const unsigned char input[16], + unsigned char output[16] ) +{ + int i; + uint8x16_t state_vec, roundkey_vec; + uint8_t *RK = (uint8_t *) rk; + + // Load input and round key into into their vectors + state_vec = vld1q_u8( input ); + + if ( mode == AES_ENCRYPT ) + { + // Initial AddRoundKey is in the loop due to AES instruction always doing AddRoundKey first + for( i = 0; i < nr - 1; i++ ) + { + // Load Round Key + roundkey_vec = vld1q_u8( RK ); + // Forward (AESE) round (AddRoundKey, SubBytes and ShiftRows) + state_vec = vaeseq_u8( state_vec, roundkey_vec ); + // Mix Columns (AESMC) + state_vec = vaesmcq_u8( state_vec ); + // Move pointer ready to load next round key + RK += 16; + } + + // Final Forward (AESE) round (AddRoundKey, SubBytes and ShiftRows). No Mix columns + roundkey_vec = vld1q_u8( RK ); /* RK already moved in loop */ + state_vec = vaeseq_u8( state_vec, roundkey_vec ); + } + else + { + // Initial AddRoundKey is in the loop due to AES instruction always doing AddRoundKey first + for( i = 0; i < nr - 1; i++ ) + { + // Load Round Key + roundkey_vec = vld1q_u8( RK ); + // Reverse (AESD) round (AddRoundKey, SubBytes and ShiftRows) + state_vec = vaesdq_u8( state_vec, roundkey_vec ); + // Inverse Mix Columns (AESIMC) + state_vec = vaesimcq_u8( state_vec ); + // Move pointer ready to load next round key + RK += 16; + } + + // Final Reverse (AESD) round (AddRoundKey, SubBytes and ShiftRows). No Mix columns + roundkey_vec = vld1q_u8( RK ); /* RK already moved in loop */ + state_vec = vaesdq_u8( state_vec, roundkey_vec ); + } + + // Manually apply final Add RoundKey step (EOR) + RK += 16; + roundkey_vec = vld1q_u8( RK ); + state_vec = veorq_u8( state_vec, roundkey_vec ); + + // Write results back to output array + vst1q_u8( output, state_vec ); +} + +#endif /* HAVE_ARM64 */ diff --git a/lib/aesarm.h b/lib/aesarm.h new file mode 100644 index 0000000..2178e42 --- /dev/null +++ b/lib/aesarm.h @@ -0,0 +1,84 @@ +/* + * This file is adapted from https://github.com/CriticalBlue/mbedtls + */ + +/** + * \file aes_armv8a_ce.h + * + * \brief AES support functions using the ARMv8-A Cryptography Extension for + * hardware acceleration on some ARM processors. + * + * Copyright (C) 2016, CriticalBlue Limited, All Rights Reserved + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This file is part of mbed TLS (https://tls.mbed.org) + */ + +#ifndef _AESARM_H_ +#define _AESARM_H_ + +#ifndef AES_ENCRYPT +#define AES_ENCRYPT 1 +#endif + +#ifndef AES_DECRYPT +#define AES_DECRYPT 0 +#endif + +#if defined(__GNUC__) && \ + __ARM_ARCH >= 8 && \ + __ARM_ARCH_PROFILE == 'A' && \ + defined(__aarch64__) && \ + defined(__ARM_FEATURE_CRYPTO) && \ + defined(__linux__) && \ + !defined(NO_AESACC) +#define HAVE_ARM64 +#endif + +#if defined(HAVE_ARM64) + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * \brief ARMv8-A features detection routine + * + * \return 1 if the CPU has support for the feature, 0 otherwise + */ +int aesarm_supported( void ); + +/** + * \brief AES ARMv8-A Cryptography Extension AES-ECB block en(de)cryption + * + * \param nr number of rounds + * \param rk AES round keys + * \param mode AESARM_ENCRYPT or AESARM_DECRYPT + * \param input 16-byte input block + * \param output 16-byte output block + */ +void aesarm_crypt_ecb( int nr, + unsigned char *rk, + int mode, + const unsigned char input[16], + unsigned char output[16] ); + +#ifdef __cplusplus +} +#endif + +#endif /* HAVE_ARM64 */ + +#endif /* _AESARM_H_ */ diff --git a/lib/aesarm_table.h b/lib/aesarm_table.h new file mode 100644 index 0000000..fc63cdb --- /dev/null +++ b/lib/aesarm_table.h @@ -0,0 +1,140 @@ +/* + * This file is adapted from PolarSSL 1.3.19 (GPL) + */ + +/* + * Forward S-box + */ +static const unsigned char FSb[256] = +{ + 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, + 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, + 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, + 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, + 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, + 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, + 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, + 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, + 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, + 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, + 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, + 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, + 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, + 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, + 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, + 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, + 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, + 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, + 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, + 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, + 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, + 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, + 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, + 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, + 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, + 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, + 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, + 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, + 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, + 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, + 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, + 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16 +}; + +/* + * Round constants + */ +static const uint32_t RCON[10] = +{ + 0x00000001, 0x00000002, 0x00000004, 0x00000008, + 0x00000010, 0x00000020, 0x00000040, 0x00000080, + 0x0000001B, 0x00000036 +}; + +/* + * Reverse tables + */ +#define RT \ +\ + V(50,A7,F4,51), V(53,65,41,7E), V(C3,A4,17,1A), V(96,5E,27,3A), \ + V(CB,6B,AB,3B), V(F1,45,9D,1F), V(AB,58,FA,AC), V(93,03,E3,4B), \ + V(55,FA,30,20), V(F6,6D,76,AD), V(91,76,CC,88), V(25,4C,02,F5), \ + V(FC,D7,E5,4F), V(D7,CB,2A,C5), V(80,44,35,26), V(8F,A3,62,B5), \ + V(49,5A,B1,DE), V(67,1B,BA,25), V(98,0E,EA,45), V(E1,C0,FE,5D), \ + V(02,75,2F,C3), V(12,F0,4C,81), V(A3,97,46,8D), V(C6,F9,D3,6B), \ + V(E7,5F,8F,03), V(95,9C,92,15), V(EB,7A,6D,BF), V(DA,59,52,95), \ + V(2D,83,BE,D4), V(D3,21,74,58), V(29,69,E0,49), V(44,C8,C9,8E), \ + V(6A,89,C2,75), V(78,79,8E,F4), V(6B,3E,58,99), V(DD,71,B9,27), \ + V(B6,4F,E1,BE), V(17,AD,88,F0), V(66,AC,20,C9), V(B4,3A,CE,7D), \ + V(18,4A,DF,63), V(82,31,1A,E5), V(60,33,51,97), V(45,7F,53,62), \ + V(E0,77,64,B1), V(84,AE,6B,BB), V(1C,A0,81,FE), V(94,2B,08,F9), \ + V(58,68,48,70), V(19,FD,45,8F), V(87,6C,DE,94), V(B7,F8,7B,52), \ + V(23,D3,73,AB), V(E2,02,4B,72), V(57,8F,1F,E3), V(2A,AB,55,66), \ + V(07,28,EB,B2), V(03,C2,B5,2F), V(9A,7B,C5,86), V(A5,08,37,D3), \ + V(F2,87,28,30), V(B2,A5,BF,23), V(BA,6A,03,02), V(5C,82,16,ED), \ + V(2B,1C,CF,8A), V(92,B4,79,A7), V(F0,F2,07,F3), V(A1,E2,69,4E), \ + V(CD,F4,DA,65), V(D5,BE,05,06), V(1F,62,34,D1), V(8A,FE,A6,C4), \ + V(9D,53,2E,34), V(A0,55,F3,A2), V(32,E1,8A,05), V(75,EB,F6,A4), \ + V(39,EC,83,0B), V(AA,EF,60,40), V(06,9F,71,5E), V(51,10,6E,BD), \ + V(F9,8A,21,3E), V(3D,06,DD,96), V(AE,05,3E,DD), V(46,BD,E6,4D), \ + V(B5,8D,54,91), V(05,5D,C4,71), V(6F,D4,06,04), V(FF,15,50,60), \ + V(24,FB,98,19), V(97,E9,BD,D6), V(CC,43,40,89), V(77,9E,D9,67), \ + V(BD,42,E8,B0), V(88,8B,89,07), V(38,5B,19,E7), V(DB,EE,C8,79), \ + V(47,0A,7C,A1), V(E9,0F,42,7C), V(C9,1E,84,F8), V(00,00,00,00), \ + V(83,86,80,09), V(48,ED,2B,32), V(AC,70,11,1E), V(4E,72,5A,6C), \ + V(FB,FF,0E,FD), V(56,38,85,0F), V(1E,D5,AE,3D), V(27,39,2D,36), \ + V(64,D9,0F,0A), V(21,A6,5C,68), V(D1,54,5B,9B), V(3A,2E,36,24), \ + V(B1,67,0A,0C), V(0F,E7,57,93), V(D2,96,EE,B4), V(9E,91,9B,1B), \ + V(4F,C5,C0,80), V(A2,20,DC,61), V(69,4B,77,5A), V(16,1A,12,1C), \ + V(0A,BA,93,E2), V(E5,2A,A0,C0), V(43,E0,22,3C), V(1D,17,1B,12), \ + V(0B,0D,09,0E), V(AD,C7,8B,F2), V(B9,A8,B6,2D), V(C8,A9,1E,14), \ + V(85,19,F1,57), V(4C,07,75,AF), V(BB,DD,99,EE), V(FD,60,7F,A3), \ + V(9F,26,01,F7), V(BC,F5,72,5C), V(C5,3B,66,44), V(34,7E,FB,5B), \ + V(76,29,43,8B), V(DC,C6,23,CB), V(68,FC,ED,B6), V(63,F1,E4,B8), \ + V(CA,DC,31,D7), V(10,85,63,42), V(40,22,97,13), V(20,11,C6,84), \ + V(7D,24,4A,85), V(F8,3D,BB,D2), V(11,32,F9,AE), V(6D,A1,29,C7), \ + V(4B,2F,9E,1D), V(F3,30,B2,DC), V(EC,52,86,0D), V(D0,E3,C1,77), \ + V(6C,16,B3,2B), V(99,B9,70,A9), V(FA,48,94,11), V(22,64,E9,47), \ + V(C4,8C,FC,A8), V(1A,3F,F0,A0), V(D8,2C,7D,56), V(EF,90,33,22), \ + V(C7,4E,49,87), V(C1,D1,38,D9), V(FE,A2,CA,8C), V(36,0B,D4,98), \ + V(CF,81,F5,A6), V(28,DE,7A,A5), V(26,8E,B7,DA), V(A4,BF,AD,3F), \ + V(E4,9D,3A,2C), V(0D,92,78,50), V(9B,CC,5F,6A), V(62,46,7E,54), \ + V(C2,13,8D,F6), V(E8,B8,D8,90), V(5E,F7,39,2E), V(F5,AF,C3,82), \ + V(BE,80,5D,9F), V(7C,93,D0,69), V(A9,2D,D5,6F), V(B3,12,25,CF), \ + V(3B,99,AC,C8), V(A7,7D,18,10), V(6E,63,9C,E8), V(7B,BB,3B,DB), \ + V(09,78,26,CD), V(F4,18,59,6E), V(01,B7,9A,EC), V(A8,9A,4F,83), \ + V(65,6E,95,E6), V(7E,E6,FF,AA), V(08,CF,BC,21), V(E6,E8,15,EF), \ + V(D9,9B,E7,BA), V(CE,36,6F,4A), V(D4,09,9F,EA), V(D6,7C,B0,29), \ + V(AF,B2,A4,31), V(31,23,3F,2A), V(30,94,A5,C6), V(C0,66,A2,35), \ + V(37,BC,4E,74), V(A6,CA,82,FC), V(B0,D0,90,E0), V(15,D8,A7,33), \ + V(4A,98,04,F1), V(F7,DA,EC,41), V(0E,50,CD,7F), V(2F,F6,91,17), \ + V(8D,D6,4D,76), V(4D,B0,EF,43), V(54,4D,AA,CC), V(DF,04,96,E4), \ + V(E3,B5,D1,9E), V(1B,88,6A,4C), V(B8,1F,2C,C1), V(7F,51,65,46), \ + V(04,EA,5E,9D), V(5D,35,8C,01), V(73,74,87,FA), V(2E,41,0B,FB), \ + V(5A,1D,67,B3), V(52,D2,DB,92), V(33,56,10,E9), V(13,47,D6,6D), \ + V(8C,61,D7,9A), V(7A,0C,A1,37), V(8E,14,F8,59), V(89,3C,13,EB), \ + V(EE,27,A9,CE), V(35,C9,61,B7), V(ED,E5,1C,E1), V(3C,B1,47,7A), \ + V(59,DF,D2,9C), V(3F,73,F2,55), V(79,CE,14,18), V(BF,37,C7,73), \ + V(EA,CD,F7,53), V(5B,AA,FD,5F), V(14,6F,3D,DF), V(86,DB,44,78), \ + V(81,F3,AF,CA), V(3E,C4,68,B9), V(2C,34,24,38), V(5F,40,A3,C2), \ + V(72,C3,1D,16), V(0C,25,E2,BC), V(8B,49,3C,28), V(41,95,0D,FF), \ + V(71,01,A8,39), V(DE,B3,0C,08), V(9C,E4,B4,D8), V(90,C1,56,64), \ + V(61,84,CB,7B), V(70,B6,32,D5), V(74,5C,6C,48), V(42,57,B8,D0) + +#define V(a,b,c,d) 0x##a##b##c##d +static const uint32_t RT0[256] = { RT }; +#undef V + +#define V(a,b,c,d) 0x##b##c##d##a +static const uint32_t RT1[256] = { RT }; +#undef V + +#define V(a,b,c,d) 0x##c##d##a##b +static const uint32_t RT2[256] = { RT }; +#undef V + +#define V(a,b,c,d) 0x##d##a##b##c +static const uint32_t RT3[256] = { RT }; +#undef V + +#undef RT diff --git a/lib/aesni.c b/lib/aesni.c new file mode 100644 index 0000000..66e6cb8 --- /dev/null +++ b/lib/aesni.c @@ -0,0 +1,327 @@ +/* + * This file is adapted from PolarSSL 1.3.19 (GPL) + */ + +/* + * AES-NI support functions + * + * Copyright (C) 2006-2014, ARM Limited, All Rights Reserved + * + * This file is part of mbed TLS (https://tls.mbed.org) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* + * [AES-WP] http://software.intel.com/en-us/articles/intel-advanced-encryption-standard-aes-instructions-set + * [CLMUL-WP] http://software.intel.com/en-us/articles/intel-carry-less-multiplication-instruction-and-its-usage-for-computing-the-gcm-mode/ + */ + +#include +#include "aesni.h" + +#if defined(HAVE_AMD64) + +/* + * AES-NI support detection routine + */ +#define AESNI_AES 0x02000000u + +int aesni_supported( void ) +{ + static int done = 0; + static unsigned int c = 0; + + if( ! done ) + { + asm( "movl $1, %%eax \n\t" + "cpuid \n\t" + : "=c" (c) + : + : "eax", "ebx", "edx" ); + done = 1; + } + + return( ( c & AESNI_AES ) != 0 ); +} + +/* + * Binutils needs to be at least 2.19 to support AES-NI instructions. + * Unfortunately, a lot of users have a lower version now (2014-04). + * Emit bytecode directly in order to support "old" version of gas. + * + * Opcodes from the Intel architecture reference manual, vol. 3. + * We always use registers, so we don't need prefixes for memory operands. + * Operand macros are in gas order (src, dst) as opposed to Intel order + * (dst, src) in order to blend better into the surrounding assembly code. + */ +#define AESDEC ".byte 0x66,0x0F,0x38,0xDE," +#define AESDECLAST ".byte 0x66,0x0F,0x38,0xDF," +#define AESENC ".byte 0x66,0x0F,0x38,0xDC," +#define AESENCLAST ".byte 0x66,0x0F,0x38,0xDD," +#define AESIMC ".byte 0x66,0x0F,0x38,0xDB," +#define AESKEYGENA ".byte 0x66,0x0F,0x3A,0xDF," +#define PCLMULQDQ ".byte 0x66,0x0F,0x3A,0x44," + +#define xmm0_xmm0 "0xC0" +#define xmm0_xmm1 "0xC8" +#define xmm0_xmm2 "0xD0" +#define xmm0_xmm3 "0xD8" +#define xmm0_xmm4 "0xE0" +#define xmm1_xmm0 "0xC1" +#define xmm1_xmm2 "0xD1" + +/* + * AES-NI AES-ECB block en(de)cryption + */ +int aesni_crypt_ecb( int nr, + unsigned char *rk, + int mode, + const unsigned char input[16], + unsigned char output[16] ) +{ + asm( "movdqu (%3), %%xmm0 \n\t" // load input + "movdqu (%1), %%xmm1 \n\t" // load round key 0 + "pxor %%xmm1, %%xmm0 \n\t" // round 0 + "addq $16, %1 \n\t" // point to next round key + "subl $1, %0 \n\t" // normal rounds = nr - 1 + "test %2, %2 \n\t" // mode? + "jz 2f \n\t" // 0 = decrypt + + "1: \n\t" // encryption loop + "movdqu (%1), %%xmm1 \n\t" // load round key + AESENC xmm1_xmm0 "\n\t" // do round + "addq $16, %1 \n\t" // point to next round key + "subl $1, %0 \n\t" // loop + "jnz 1b \n\t" + "movdqu (%1), %%xmm1 \n\t" // load round key + AESENCLAST xmm1_xmm0 "\n\t" // last round + "jmp 3f \n\t" + + "2: \n\t" // decryption loop + "movdqu (%1), %%xmm1 \n\t" + AESDEC xmm1_xmm0 "\n\t" // do round + "addq $16, %1 \n\t" + "subl $1, %0 \n\t" + "jnz 2b \n\t" + "movdqu (%1), %%xmm1 \n\t" // load round key + AESDECLAST xmm1_xmm0 "\n\t" // last round + + "3: \n\t" + "movdqu %%xmm0, (%4) \n\t" // export output + : + : "r" (nr), "r" (rk), "r" (mode), "r" (input), "r" (output) + : "memory", "cc", "xmm0", "xmm1" ); + + + return( 0 ); +} + +/* + * Compute decryption round keys from encryption round keys + */ +void aesni_inverse_key( unsigned char *invkey, + const unsigned char *fwdkey, int nr ) +{ + unsigned char *ik = invkey; + const unsigned char *fk = fwdkey + 16 * nr; + + memcpy( ik, fk, 16 ); + + for( fk -= 16, ik += 16; fk > fwdkey; fk -= 16, ik += 16 ) + asm( "movdqu (%0), %%xmm0 \n\t" + AESIMC xmm0_xmm0 "\n\t" + "movdqu %%xmm0, (%1) \n\t" + : + : "r" (fk), "r" (ik) + : "memory", "xmm0" ); + + memcpy( ik, fk, 16 ); +} + +/* + * Key expansion, 128-bit case + */ +void aesni_setkey_enc_128( unsigned char *rk, + const unsigned char *key ) +{ + asm( "movdqu (%1), %%xmm0 \n\t" // copy the original key + "movdqu %%xmm0, (%0) \n\t" // as round key 0 + "jmp 2f \n\t" // skip auxiliary routine + + /* + * Finish generating the next round key. + * + * On entry xmm0 is r3:r2:r1:r0 and xmm1 is X:stuff:stuff:stuff + * with X = rot( sub( r3 ) ) ^ RCON. + * + * On exit, xmm0 is r7:r6:r5:r4 + * with r4 = X + r0, r5 = r4 + r1, r6 = r5 + r2, r7 = r6 + r3 + * and those are written to the round key buffer. + */ + "1: \n\t" + "pshufd $0xff, %%xmm1, %%xmm1 \n\t" // X:X:X:X + "pxor %%xmm0, %%xmm1 \n\t" // X+r3:X+r2:X+r1:r4 + "pslldq $4, %%xmm0 \n\t" // r2:r1:r0:0 + "pxor %%xmm0, %%xmm1 \n\t" // X+r3+r2:X+r2+r1:r5:r4 + "pslldq $4, %%xmm0 \n\t" // etc + "pxor %%xmm0, %%xmm1 \n\t" + "pslldq $4, %%xmm0 \n\t" + "pxor %%xmm1, %%xmm0 \n\t" // update xmm0 for next time! + "add $16, %0 \n\t" // point to next round key + "movdqu %%xmm0, (%0) \n\t" // write it + "ret \n\t" + + /* Main "loop" */ + "2: \n\t" + AESKEYGENA xmm0_xmm1 ",0x01 \n\tcall 1b \n\t" + AESKEYGENA xmm0_xmm1 ",0x02 \n\tcall 1b \n\t" + AESKEYGENA xmm0_xmm1 ",0x04 \n\tcall 1b \n\t" + AESKEYGENA xmm0_xmm1 ",0x08 \n\tcall 1b \n\t" + AESKEYGENA xmm0_xmm1 ",0x10 \n\tcall 1b \n\t" + AESKEYGENA xmm0_xmm1 ",0x20 \n\tcall 1b \n\t" + AESKEYGENA xmm0_xmm1 ",0x40 \n\tcall 1b \n\t" + AESKEYGENA xmm0_xmm1 ",0x80 \n\tcall 1b \n\t" + AESKEYGENA xmm0_xmm1 ",0x1B \n\tcall 1b \n\t" + AESKEYGENA xmm0_xmm1 ",0x36 \n\tcall 1b \n\t" + : + : "r" (rk), "r" (key) + : "memory", "cc", "0" ); +} + +/* + * Key expansion, 192-bit case + */ +void aesni_setkey_enc_192( unsigned char *rk, + const unsigned char *key ) +{ + asm( "movdqu (%1), %%xmm0 \n\t" // copy original round key + "movdqu %%xmm0, (%0) \n\t" + "add $16, %0 \n\t" + "movq 16(%1), %%xmm1 \n\t" + "movq %%xmm1, (%0) \n\t" + "add $8, %0 \n\t" + "jmp 2f \n\t" // skip auxiliary routine + + /* + * Finish generating the next 6 quarter-keys. + * + * On entry xmm0 is r3:r2:r1:r0, xmm1 is stuff:stuff:r5:r4 + * and xmm2 is stuff:stuff:X:stuff with X = rot( sub( r3 ) ) ^ RCON. + * + * On exit, xmm0 is r9:r8:r7:r6 and xmm1 is stuff:stuff:r11:r10 + * and those are written to the round key buffer. + */ + "1: \n\t" + "pshufd $0x55, %%xmm2, %%xmm2 \n\t" // X:X:X:X + "pxor %%xmm0, %%xmm2 \n\t" // X+r3:X+r2:X+r1:r4 + "pslldq $4, %%xmm0 \n\t" // etc + "pxor %%xmm0, %%xmm2 \n\t" + "pslldq $4, %%xmm0 \n\t" + "pxor %%xmm0, %%xmm2 \n\t" + "pslldq $4, %%xmm0 \n\t" + "pxor %%xmm2, %%xmm0 \n\t" // update xmm0 = r9:r8:r7:r6 + "movdqu %%xmm0, (%0) \n\t" + "add $16, %0 \n\t" + "pshufd $0xff, %%xmm0, %%xmm2 \n\t" // r9:r9:r9:r9 + "pxor %%xmm1, %%xmm2 \n\t" // stuff:stuff:r9+r5:r10 + "pslldq $4, %%xmm1 \n\t" // r2:r1:r0:0 + "pxor %%xmm2, %%xmm1 \n\t" // xmm1 = stuff:stuff:r11:r10 + "movq %%xmm1, (%0) \n\t" + "add $8, %0 \n\t" + "ret \n\t" + + "2: \n\t" + AESKEYGENA xmm1_xmm2 ",0x01 \n\tcall 1b \n\t" + AESKEYGENA xmm1_xmm2 ",0x02 \n\tcall 1b \n\t" + AESKEYGENA xmm1_xmm2 ",0x04 \n\tcall 1b \n\t" + AESKEYGENA xmm1_xmm2 ",0x08 \n\tcall 1b \n\t" + AESKEYGENA xmm1_xmm2 ",0x10 \n\tcall 1b \n\t" + AESKEYGENA xmm1_xmm2 ",0x20 \n\tcall 1b \n\t" + AESKEYGENA xmm1_xmm2 ",0x40 \n\tcall 1b \n\t" + AESKEYGENA xmm1_xmm2 ",0x80 \n\tcall 1b \n\t" + + : + : "r" (rk), "r" (key) + : "memory", "cc", "0" ); +} + +/* + * Key expansion, 256-bit case + */ +void aesni_setkey_enc_256( unsigned char *rk, + const unsigned char *key ) +{ + asm( "movdqu (%1), %%xmm0 \n\t" + "movdqu %%xmm0, (%0) \n\t" + "add $16, %0 \n\t" + "movdqu 16(%1), %%xmm1 \n\t" + "movdqu %%xmm1, (%0) \n\t" + "jmp 2f \n\t" // skip auxiliary routine + + /* + * Finish generating the next two round keys. + * + * On entry xmm0 is r3:r2:r1:r0, xmm1 is r7:r6:r5:r4 and + * xmm2 is X:stuff:stuff:stuff with X = rot( sub( r7 )) ^ RCON + * + * On exit, xmm0 is r11:r10:r9:r8 and xmm1 is r15:r14:r13:r12 + * and those have been written to the output buffer. + */ + "1: \n\t" + "pshufd $0xff, %%xmm2, %%xmm2 \n\t" + "pxor %%xmm0, %%xmm2 \n\t" + "pslldq $4, %%xmm0 \n\t" + "pxor %%xmm0, %%xmm2 \n\t" + "pslldq $4, %%xmm0 \n\t" + "pxor %%xmm0, %%xmm2 \n\t" + "pslldq $4, %%xmm0 \n\t" + "pxor %%xmm2, %%xmm0 \n\t" + "add $16, %0 \n\t" + "movdqu %%xmm0, (%0) \n\t" + + /* Set xmm2 to stuff:Y:stuff:stuff with Y = subword( r11 ) + * and proceed to generate next round key from there */ + AESKEYGENA xmm0_xmm2 ",0x00 \n\t" + "pshufd $0xaa, %%xmm2, %%xmm2 \n\t" + "pxor %%xmm1, %%xmm2 \n\t" + "pslldq $4, %%xmm1 \n\t" + "pxor %%xmm1, %%xmm2 \n\t" + "pslldq $4, %%xmm1 \n\t" + "pxor %%xmm1, %%xmm2 \n\t" + "pslldq $4, %%xmm1 \n\t" + "pxor %%xmm2, %%xmm1 \n\t" + "add $16, %0 \n\t" + "movdqu %%xmm1, (%0) \n\t" + "ret \n\t" + + /* + * Main "loop" - Generating one more key than necessary, + * see definition of aes_context.buf + */ + "2: \n\t" + AESKEYGENA xmm1_xmm2 ",0x01 \n\tcall 1b \n\t" + AESKEYGENA xmm1_xmm2 ",0x02 \n\tcall 1b \n\t" + AESKEYGENA xmm1_xmm2 ",0x04 \n\tcall 1b \n\t" + AESKEYGENA xmm1_xmm2 ",0x08 \n\tcall 1b \n\t" + AESKEYGENA xmm1_xmm2 ",0x10 \n\tcall 1b \n\t" + AESKEYGENA xmm1_xmm2 ",0x20 \n\tcall 1b \n\t" + AESKEYGENA xmm1_xmm2 ",0x40 \n\tcall 1b \n\t" + : + : "r" (rk), "r" (key) + : "memory", "cc", "0" ); +} + +#endif /* HAVE_AMD64 */ diff --git a/lib/aesni.h b/lib/aesni.h new file mode 100644 index 0000000..42b6aa1 --- /dev/null +++ b/lib/aesni.h @@ -0,0 +1,119 @@ +/* + * This file is adapted from PolarSSL 1.3.19 (GPL) + */ + +/** + * \file aesni.h + * + * \brief AES-NI for hardware AES acceleration on some Intel processors + * + * Copyright (C) 2013, ARM Limited, All Rights Reserved + * + * This file is part of mbed TLS (https://tls.mbed.org) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef _AESNI_H_ +#define _AESNI_H_ + +#ifndef AES_ENCRYPT +#define AES_ENCRYPT 1 +#endif + +#ifndef AES_DECRYPT +#define AES_DECRYPT 0 +#endif + +#if defined(__GNUC__) && \ + ( defined(__amd64__) || defined(__x86_64__) ) && \ + !defined(NO_AESACC) +#define HAVE_AMD64 +#endif + +#if defined(HAVE_AMD64) + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * \brief AES-NI features detection routine + * + * \return 1 if CPU has support for AES-NI, 0 otherwise + */ +int aesni_supported( void ); + +/** + * \brief AES-NI AES-ECB block en(de)cryption + * + * \param nr number of rounds + * \param rk AES round keys + * \param mode AES_ENCRYPT or AES_DECRYPT + * \param input 16-byte input block + * \param output 16-byte output block + * + * \return 0 on success (cannot fail) + */ +int aesni_crypt_ecb( int nr, + unsigned char *rk, + int mode, + const unsigned char input[16], + unsigned char output[16] ); + +/** + * \brief Compute decryption round keys from encryption round keys + * + * \param invkey Round keys for the equivalent inverse cipher + * \param fwdkey Original round keys (for encryption) + * \param nr Number of rounds (that is, number of round keys minus one) + */ +void aesni_inverse_key( unsigned char *invkey, + const unsigned char *fwdkey, int nr ); + +/** + * \brief Perform 128-bit key expansion (for encryption) + * + * \param rk Destination buffer where the round keys are written + * \param key Encryption key + */ +void aesni_setkey_enc_128( unsigned char *rk, + const unsigned char *key ); + +/** + * \brief Perform 192-bit key expansion (for encryption) + * + * \param rk Destination buffer where the round keys are written + * \param key Encryption key + */ +void aesni_setkey_enc_192( unsigned char *rk, + const unsigned char *key ); + +/** + * \brief Perform 256-bit key expansion (for encryption) + * + * \param rk Destination buffer where the round keys are written + * \param key Encryption key + */ +void aesni_setkey_enc_256( unsigned char *rk, + const unsigned char *key ); + +#ifdef __cplusplus +} +#endif + +#endif /* HAVE_AMD64 */ + +#endif /* _AESNI_H_ */ diff --git a/makefile b/makefile index 118e711..35d19a9 100755 --- a/makefile +++ b/makefile @@ -4,7 +4,7 @@ cc_ar71xx=/home/wangyu/OpenWrt-SDK-ar71xx-for-linux-x86_64-gcc-4.8-linaro_uClibc cc_bcm2708=/home/wangyu/raspberry/tools/arm-bcm2708/gcc-linaro-arm-linux-gnueabihf-raspbian/bin/arm-linux-gnueabihf-g++ cc_arm=/home/wangyu/Desktop/arm-2014.05/bin/arm-none-linux-gnueabi-g++ FLAGS= -std=c++11 -Wall -Wextra -Wno-unused-variable -Wno-unused-parameter -Wno-missing-field-initializers -SOURCES=main.cpp lib/aes.c lib/md5.c encrypt.cpp log.cpp network.cpp common.cpp +SOURCES=main.cpp $(wildcard lib/aes*.c) lib/md5.c encrypt.cpp log.cpp network.cpp common.cpp NAME=udp2raw TAR=${NAME}_binaries.tar.gz ${NAME}_amd64 ${NAME}_x86 ${NAME}_ar71xx ${NAME}_bcm2708 ${NAME}_arm