DRM decrypting tool for Samsung TVs PVR
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

AESNI.c 9.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. /*
  2. * AESNI.c: AES using AES-NI instructions
  3. *
  4. * Written in 2013 by Sebastian Ramacher <sebastian@ramacher.at>
  5. *
  6. * ===================================================================
  7. * The contents of this file are dedicated to the public domain. To
  8. * the extent that dedication to the public domain is not available,
  9. * everyone is granted a worldwide, perpetual, royalty-free,
  10. * non-exclusive license to exercise all rights associated with the
  11. * contents of this file for any purpose whatsoever.
  12. * No rights are reserved.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  15. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  16. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  17. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  18. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  19. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  20. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. * SOFTWARE.
  22. * ===================================================================
  23. */
  24. #include <wmmintrin.h>
  25. #include <stdlib.h>
  26. #include <string.h>
  27. #include <errno.h>
  28. #if defined(HAVE__ALIGNED_MALLOC)
  29. #include <malloc.h>
  30. #endif
  31. #define MODULE_NAME _AESNI
  32. #define BLOCK_SIZE 16
  33. #define KEY_SIZE 0
  34. #define MAXKC (256/32)
  35. #define MAXKB (256/8)
  36. #define MAXNR 14
  37. typedef unsigned char u8;
  38. typedef struct {
  39. __m128i* ek;
  40. __m128i* dk;
  41. int rounds;
  42. } block_state;
  43. /* Wrapper functions for malloc and free with memory alignment */
  44. #if defined(HAVE_ALIGNED_ALLOC) /* aligned_alloc is defined by C11 */
  45. # define aligned_malloc_wrapper aligned_alloc
  46. # define aligned_free_wrapper free
  47. #elif defined(HAVE_POSIX_MEMALIGN) /* posix_memalign is defined by POSIX */
  48. static void* aligned_malloc_wrapper(size_t alignment, size_t size)
  49. {
  50. void* tmp = NULL;
  51. int err = posix_memalign(&tmp, alignment, size);
  52. if (err != 0) {
  53. /* posix_memalign does NOT set errno on failure; the error is returned */
  54. errno = err;
  55. return NULL;
  56. }
  57. return tmp;
  58. }
  59. # define aligned_free_wrapper free
  60. #elif defined(HAVE__ALIGNED_MALLOC) /* _aligned_malloc is available on Windows */
  61. static void* aligned_malloc_wrapper(size_t alignment, size_t size)
  62. {
  63. /* NB: _aligned_malloc takes its args in the opposite order from aligned_alloc */
  64. return _aligned_malloc(size, alignment);
  65. }
  66. # define aligned_free_wrapper _aligned_free
  67. #else
  68. # error "No function to allocate/free aligned memory is available."
  69. #endif
  70. /* Helper functions to expand keys */
  71. static __m128i aes128_keyexpand(__m128i key)
  72. {
  73. key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
  74. key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
  75. return _mm_xor_si128(key, _mm_slli_si128(key, 4));
  76. }
  77. static __m128i aes192_keyexpand_2(__m128i key, __m128i key2)
  78. {
  79. key = _mm_shuffle_epi32(key, 0xff);
  80. key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 4));
  81. return _mm_xor_si128(key, key2);
  82. }
  83. #define KEYEXP128_H(K1, K2, I, S) _mm_xor_si128(aes128_keyexpand(K1), \
  84. _mm_shuffle_epi32(_mm_aeskeygenassist_si128(K2, I), S))
  85. #define KEYEXP128(K, I) KEYEXP128_H(K, K, I, 0xff)
  86. #define KEYEXP192(K1, K2, I) KEYEXP128_H(K1, K2, I, 0x55)
  87. #define KEYEXP192_2(K1, K2) aes192_keyexpand_2(K1, K2)
  88. #define KEYEXP256(K1, K2, I) KEYEXP128_H(K1, K2, I, 0xff)
  89. #define KEYEXP256_2(K1, K2) KEYEXP128_H(K1, K2, 0x00, 0xaa)
  90. /* Encryption key setup */
  91. static void aes_key_setup_enc(__m128i rk[], const u8* cipherKey, int keylen)
  92. {
  93. switch (keylen) {
  94. case 16:
  95. {
  96. /* 128 bit key setup */
  97. rk[0] = _mm_loadu_si128((const __m128i*) cipherKey);
  98. rk[1] = KEYEXP128(rk[0], 0x01);
  99. rk[2] = KEYEXP128(rk[1], 0x02);
  100. rk[3] = KEYEXP128(rk[2], 0x04);
  101. rk[4] = KEYEXP128(rk[3], 0x08);
  102. rk[5] = KEYEXP128(rk[4], 0x10);
  103. rk[6] = KEYEXP128(rk[5], 0x20);
  104. rk[7] = KEYEXP128(rk[6], 0x40);
  105. rk[8] = KEYEXP128(rk[7], 0x80);
  106. rk[9] = KEYEXP128(rk[8], 0x1B);
  107. rk[10] = KEYEXP128(rk[9], 0x36);
  108. break;
  109. }
  110. case 24:
  111. {
  112. /* 192 bit key setup */
  113. __m128i temp[2];
  114. rk[0] = _mm_loadu_si128((const __m128i*) cipherKey);
  115. rk[1] = _mm_loadu_si128((const __m128i*) (cipherKey+16));
  116. temp[0] = KEYEXP192(rk[0], rk[1], 0x01);
  117. temp[1] = KEYEXP192_2(temp[0], rk[1]);
  118. rk[1] = (__m128i)_mm_shuffle_pd((__m128d)rk[1], (__m128d)temp[0], 0);
  119. rk[2] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1);
  120. rk[3] = KEYEXP192(temp[0], temp[1], 0x02);
  121. rk[4] = KEYEXP192_2(rk[3], temp[1]);
  122. temp[0] = KEYEXP192(rk[3], rk[4], 0x04);
  123. temp[1] = KEYEXP192_2(temp[0], rk[4]);
  124. rk[4] = (__m128i)_mm_shuffle_pd((__m128d)rk[4], (__m128d)temp[0], 0);
  125. rk[5] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1);
  126. rk[6] = KEYEXP192(temp[0], temp[1], 0x08);
  127. rk[7] = KEYEXP192_2(rk[6], temp[1]);
  128. temp[0] = KEYEXP192(rk[6], rk[7], 0x10);
  129. temp[1] = KEYEXP192_2(temp[0], rk[7]);
  130. rk[7] = (__m128i)_mm_shuffle_pd((__m128d)rk[7], (__m128d)temp[0], 0);
  131. rk[8] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1);
  132. rk[9] = KEYEXP192(temp[0], temp[1], 0x20);
  133. rk[10] = KEYEXP192_2(rk[9], temp[1]);
  134. temp[0] = KEYEXP192(rk[9], rk[10], 0x40);
  135. temp[1] = KEYEXP192_2(temp[0], rk[10]);
  136. rk[10] = (__m128i)_mm_shuffle_pd((__m128d)rk[10], (__m128d) temp[0], 0);
  137. rk[11] = (__m128i)_mm_shuffle_pd((__m128d)temp[0],(__m128d) temp[1], 1);
  138. rk[12] = KEYEXP192(temp[0], temp[1], 0x80);
  139. break;
  140. }
  141. case 32:
  142. {
  143. /* 256 bit key setup */
  144. rk[0] = _mm_loadu_si128((const __m128i*) cipherKey);
  145. rk[1] = _mm_loadu_si128((const __m128i*) (cipherKey+16));
  146. rk[2] = KEYEXP256(rk[0], rk[1], 0x01);
  147. rk[3] = KEYEXP256_2(rk[1], rk[2]);
  148. rk[4] = KEYEXP256(rk[2], rk[3], 0x02);
  149. rk[5] = KEYEXP256_2(rk[3], rk[4]);
  150. rk[6] = KEYEXP256(rk[4], rk[5], 0x04);
  151. rk[7] = KEYEXP256_2(rk[5], rk[6]);
  152. rk[8] = KEYEXP256(rk[6], rk[7], 0x08);
  153. rk[9] = KEYEXP256_2(rk[7], rk[8]);
  154. rk[10] = KEYEXP256(rk[8], rk[9], 0x10);
  155. rk[11] = KEYEXP256_2(rk[9], rk[10]);
  156. rk[12] = KEYEXP256(rk[10], rk[11], 0x20);
  157. rk[13] = KEYEXP256_2(rk[11], rk[12]);
  158. rk[14] = KEYEXP256(rk[12], rk[13], 0x40);
  159. break;
  160. }
  161. }
  162. }
  163. /* Decryption key setup */
  164. static void aes_key_setup_dec(__m128i dk[], const __m128i ek[], int rounds)
  165. {
  166. int i;
  167. dk[rounds] = ek[0];
  168. for (i = 1; i < rounds; ++i) {
  169. dk[rounds - i] = _mm_aesimc_si128(ek[i]);
  170. }
  171. dk[0] = ek[rounds];
  172. }
  173. void block_init_aesni(block_state* self, unsigned char* key, int keylen)
  174. {
  175. int nr = 0;
  176. switch (keylen) {
  177. case 16: nr = 10; break;
  178. case 24: nr = 12; break;
  179. case 32: nr = 14; break;
  180. default:
  181. return;
  182. }
  183. /* ensure that self->ek and self->dk are aligned to 16 byte boundaries */
  184. void* tek = aligned_malloc_wrapper(16, (nr + 1) * sizeof(__m128i));
  185. void* tdk = aligned_malloc_wrapper(16, (nr + 1) * sizeof(__m128i));
  186. if (!tek || !tdk) {
  187. aligned_free_wrapper(tek);
  188. aligned_free_wrapper(tdk);
  189. return;
  190. }
  191. self->ek = tek;
  192. self->dk = tdk;
  193. self->rounds = nr;
  194. aes_key_setup_enc(self->ek, key, keylen);
  195. aes_key_setup_dec(self->dk, self->ek, nr);
  196. }
  197. void block_finalize_aesni(block_state* self)
  198. {
  199. /* overwrite contents of ek and dk */
  200. memset(self->ek, 0, (self->rounds + 1) * sizeof(__m128i));
  201. memset(self->dk, 0, (self->rounds + 1) * sizeof(__m128i));
  202. aligned_free_wrapper(self->ek);
  203. aligned_free_wrapper(self->dk);
  204. }
  205. void block_encrypt_aesni(block_state* self, const u8* in, u8* out)
  206. {
  207. __m128i m = _mm_loadu_si128((const __m128i*) in);
  208. /* first 9 rounds */
  209. m = _mm_xor_si128(m, self->ek[0]);
  210. m = _mm_aesenc_si128(m, self->ek[1]);
  211. m = _mm_aesenc_si128(m, self->ek[2]);
  212. m = _mm_aesenc_si128(m, self->ek[3]);
  213. m = _mm_aesenc_si128(m, self->ek[4]);
  214. m = _mm_aesenc_si128(m, self->ek[5]);
  215. m = _mm_aesenc_si128(m, self->ek[6]);
  216. m = _mm_aesenc_si128(m, self->ek[7]);
  217. m = _mm_aesenc_si128(m, self->ek[8]);
  218. m = _mm_aesenc_si128(m, self->ek[9]);
  219. if (self->rounds != 10) {
  220. /* two additional rounds for AES-192/256 */
  221. m = _mm_aesenc_si128(m, self->ek[10]);
  222. m = _mm_aesenc_si128(m, self->ek[11]);
  223. if (self->rounds == 14) {
  224. /* another two additional rounds for AES-256 */
  225. m = _mm_aesenc_si128(m, self->ek[12]);
  226. m = _mm_aesenc_si128(m, self->ek[13]);
  227. }
  228. }
  229. m = _mm_aesenclast_si128(m, self->ek[self->rounds]);
  230. _mm_storeu_si128((__m128i*) out, m);
  231. }
  232. void block_decrypt_aesni(block_state* self, const u8* in, u8* out)
  233. {
  234. __m128i m = _mm_loadu_si128((const __m128i*) in);
  235. /* first 9 rounds */
  236. m = _mm_xor_si128(m, self->dk[0]);
  237. m = _mm_aesdec_si128(m, self->dk[1]);
  238. m = _mm_aesdec_si128(m, self->dk[2]);
  239. m = _mm_aesdec_si128(m, self->dk[3]);
  240. m = _mm_aesdec_si128(m, self->dk[4]);
  241. m = _mm_aesdec_si128(m, self->dk[5]);
  242. m = _mm_aesdec_si128(m, self->dk[6]);
  243. m = _mm_aesdec_si128(m, self->dk[7]);
  244. m = _mm_aesdec_si128(m, self->dk[8]);
  245. m = _mm_aesdec_si128(m, self->dk[9]);
  246. if (self->rounds != 10) {
  247. /* two additional rounds for AES-192/256 */
  248. m = _mm_aesdec_si128(m, self->dk[10]);
  249. m = _mm_aesdec_si128(m, self->dk[11]);
  250. if (self->rounds == 14) {
  251. /* another two additional rounds for AES-256 */
  252. m = _mm_aesdec_si128(m, self->dk[12]);
  253. m = _mm_aesdec_si128(m, self->dk[13]);
  254. }
  255. }
  256. m = _mm_aesdeclast_si128(m, self->dk[self->rounds]);
  257. _mm_storeu_si128((__m128i*) out, m);
  258. }