author: madmaxoft <github@xoft.cz> 2014-01-22 22:26:40 +0100
committer: madmaxoft <github@xoft.cz> 2014-01-22 22:26:40 +0100
commit: 34f13d589a2ebbcae9230732c7a763b3cdd88b41 (patch)
tree: 4f7bad4f90ca8f7a896d83951804f0207082cafb /lib/cryptopp/rijndael.cpp
parent: Replacing CryptoPP with PolarSSL. (diff)
download: cuberite-34f13d589a2ebbcae9230732c7a763b3cdd88b41.tar
cuberite-34f13d589a2ebbcae9230732c7a763b3cdd88b41.tar.gz
cuberite-34f13d589a2ebbcae9230732c7a763b3cdd88b41.tar.bz2
cuberite-34f13d589a2ebbcae9230732c7a763b3cdd88b41.tar.lz
cuberite-34f13d589a2ebbcae9230732c7a763b3cdd88b41.tar.xz
cuberite-34f13d589a2ebbcae9230732c7a763b3cdd88b41.tar.zst
cuberite-34f13d589a2ebbcae9230732c7a763b3cdd88b41.zip
1 files changed, 0 insertions, 1261 deletions
diff --git a/lib/cryptopp/rijndael.cpp b/lib/cryptopp/rijndael.cpp
deleted file mode 100644
index c185032cf..000000000
--- a/lib/cryptopp/rijndael.cpp
+++ /dev/null
@@ -1,1261 +0,0 @@
-// rijndael.cpp - modified by Chris Morgan <cmorgan@wpi.edu>
-// and Wei Dai from Paulo Baretto's Rijndael implementation
-// The original code and all modifications are in the public domain.
-
-// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM rijndael.cpp" to generate MASM code
-
-/*
-July 2010: Added support for AES-NI instructions via compiler intrinsics.
-*/
-
-/*
-Feb 2009: The x86/x64 assembly code was rewritten in by Wei Dai to do counter mode 
-caching, which was invented by Hongjun Wu and popularized by Daniel J. Bernstein 
-and Peter Schwabe in their paper "New AES software speed records". The round 
-function was also modified to include a trick similar to one in Brian Gladman's 
-x86 assembly code, doing an 8-bit register move to minimize the number of 
-register spills. Also switched to compressed tables and copying round keys to 
-the stack.
-
-The C++ implementation now uses compressed tables if 
-CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS is defined.
-*/
-
-/*
-July 2006: Defense against timing attacks was added in by Wei Dai.
-
-The code now uses smaller tables in the first and last rounds,
-and preloads them into L1 cache before usage (by loading at least 
-one element in each cache line). 
-
-We try to delay subsequent accesses to each table (used in the first 
-and last rounds) until all of the table has been preloaded. Hopefully
-the compiler isn't smart enough to optimize that code away.
-
-After preloading the table, we also try not to access any memory location
-other than the table and the stack, in order to prevent table entries from 
-being unloaded from L1 cache, until that round is finished.
-(Some popular CPUs have 2-way associative caches.)
-*/
-
-// This is the original introductory comment:
-
-/**
- * version 3.0 (December 2000)
- *
- * Optimised ANSI C code for the Rijndael cipher (now AES)
- *
- * author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
- * author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
- * author Paulo Barreto <paulo.barreto@terra.com.br>
- *
- * This code is hereby placed in the public domain.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
- * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
- * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
- * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "pch.h"
-
-#ifndef CRYPTOPP_IMPORTS
-#ifndef CRYPTOPP_GENERATE_X64_MASM
-
-#include "rijndael.h"
-#include "misc.h"
-#include "cpu.h"
-
-NAMESPACE_BEGIN(CryptoPP)
-
-#ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
-namespace rdtable {CRYPTOPP_ALIGN_DATA(16) word64 Te[256+2];}
-using namespace rdtable;
-#else
-static word64 Te[256];
-#endif
-static word64 Td[256];
-#else
-static word32 Te[256*4], Td[256*4];
-#endif
-static volatile bool s_TeFilled = false, s_TdFilled = false;
-
-// ************************* Portable Code ************************************
-
-#define QUARTER_ROUND(L, T, t, a, b, c, d)	\
-	a ^= L(T, 3, byte(t)); t >>= 8;\
-	b ^= L(T, 2, byte(t)); t >>= 8;\
-	c ^= L(T, 1, byte(t)); t >>= 8;\
-	d ^= L(T, 0, t);
-
-#define QUARTER_ROUND_LE(t, a, b, c, d)	\
-	tempBlock[a] = ((byte *)(Te+byte(t)))[1]; t >>= 8;\
-	tempBlock[b] = ((byte *)(Te+byte(t)))[1]; t >>= 8;\
-	tempBlock[c] = ((byte *)(Te+byte(t)))[1]; t >>= 8;\
-	tempBlock[d] = ((byte *)(Te+t))[1];
-
-#ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
-	#define QUARTER_ROUND_LD(t, a, b, c, d)	\
-		tempBlock[a] = ((byte *)(Td+byte(t)))[GetNativeByteOrder()*7]; t >>= 8;\
-		tempBlock[b] = ((byte *)(Td+byte(t)))[GetNativeByteOrder()*7]; t >>= 8;\
-		tempBlock[c] = ((byte *)(Td+byte(t)))[GetNativeByteOrder()*7]; t >>= 8;\
-		tempBlock[d] = ((byte *)(Td+t))[GetNativeByteOrder()*7];
-#else
-	#define QUARTER_ROUND_LD(t, a, b, c, d)	\
-		tempBlock[a] = Sd[byte(t)]; t >>= 8;\
-		tempBlock[b] = Sd[byte(t)]; t >>= 8;\
-		tempBlock[c] = Sd[byte(t)]; t >>= 8;\
-		tempBlock[d] = Sd[t];
-#endif
-
-#define QUARTER_ROUND_E(t, a, b, c, d)		QUARTER_ROUND(TL_M, Te, t, a, b, c, d)
-#define QUARTER_ROUND_D(t, a, b, c, d)		QUARTER_ROUND(TL_M, Td, t, a, b, c, d)
-
-#ifdef IS_LITTLE_ENDIAN
-	#define QUARTER_ROUND_FE(t, a, b, c, d)		QUARTER_ROUND(TL_F, Te, t, d, c, b, a)
-	#define QUARTER_ROUND_FD(t, a, b, c, d)		QUARTER_ROUND(TL_F, Td, t, d, c, b, a)
-	#ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
-		#define TL_F(T, i, x)	(*(word32 *)((byte *)T + x*8 + (6-i)%4+1))
-		#define TL_M(T, i, x)	(*(word32 *)((byte *)T + x*8 + (i+3)%4+1))
-	#else
-		#define TL_F(T, i, x)	rotrFixed(T[x], (3-i)*8)
-		#define TL_M(T, i, x)	T[i*256 + x]
-	#endif
-#else
-	#define QUARTER_ROUND_FE(t, a, b, c, d)		QUARTER_ROUND(TL_F, Te, t, a, b, c, d)
-	#define QUARTER_ROUND_FD(t, a, b, c, d)		QUARTER_ROUND(TL_F, Td, t, a, b, c, d)
-	#ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
-		#define TL_F(T, i, x)	(*(word32 *)((byte *)T + x*8 + (4-i)%4))
-		#define TL_M			TL_F
-	#else
-		#define TL_F(T, i, x)	rotrFixed(T[x], i*8)
-		#define TL_M(T, i, x)	T[i*256 + x]
-	#endif
-#endif
-
-
-#define f2(x)   ((x<<1)^(((x>>7)&1)*0x11b))
-#define f4(x)   ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
-#define f8(x)   ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
-
-#define f3(x)   (f2(x) ^ x)
-#define f9(x)   (f8(x) ^ x)
-#define fb(x)   (f8(x) ^ f2(x) ^ x)
-#define fd(x)   (f8(x) ^ f4(x) ^ x)
-#define fe(x)   (f8(x) ^ f4(x) ^ f2(x))
-
-void Rijndael::Base::FillEncTable()
-{
-	for (int i=0; i<256; i++)
-	{
-		byte x = Se[i];
-#ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
-		word32 y = word32(x)<<8 | word32(x)<<16 | word32(f2(x))<<24;
-		Te[i] = word64(y | f3(x))<<32 | y;
-#else
-		word32 y = f3(x) | word32(x)<<8 | word32(x)<<16 | word32(f2(x))<<24;
-		for (int j=0; j<4; j++)
-		{
-			Te[i+j*256] = y;
-			y = rotrFixed(y, 8);
-		}
-#endif
-	}
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
-	Te[256] = Te[257] = 0;
-#endif
-	s_TeFilled = true;
-}
-
-void Rijndael::Base::FillDecTable()
-{
-	for (int i=0; i<256; i++)
-	{
-		byte x = Sd[i];
-#ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
-		word32 y = word32(fd(x))<<8 | word32(f9(x))<<16 | word32(fe(x))<<24;
-		Td[i] = word64(y | fb(x))<<32 | y | x;
-#else
-		word32 y = fb(x) | word32(fd(x))<<8 | word32(f9(x))<<16 | word32(fe(x))<<24;;
-		for (int j=0; j<4; j++)
-		{
-			Td[i+j*256] = y;
-			y = rotrFixed(y, 8);
-		}
-#endif
-	}
-	s_TdFilled = true;
-}
-
-void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keylen, const NameValuePairs &)
-{
-	AssertValidKeyLength(keylen);
-
-	m_rounds = keylen/4 + 6;
-	m_key.New(4*(m_rounds+1));
-
-	word32 *rk = m_key;
-
-#if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE && (!defined(_MSC_VER) || _MSC_VER >= 1600 || CRYPTOPP_BOOL_X86)
-	// MSVC 2008 SP1 generates bad code for _mm_extract_epi32() when compiling for X64
-	if (HasAESNI())
-	{
-		static const word32 rcLE[] = {
-			0x01, 0x02, 0x04, 0x08,
-			0x10, 0x20, 0x40, 0x80,
-			0x1B, 0x36, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
-		};
-		const word32 *rc = rcLE;
-
-		__m128i temp = _mm_loadu_si128((__m128i *)(userKey+keylen-16));
-		memcpy(rk, userKey, keylen);
-
-		while (true)
-		{
-			rk[keylen/4] = rk[0] ^ _mm_extract_epi32(_mm_aeskeygenassist_si128(temp, 0), 3) ^ *(rc++);
-			rk[keylen/4+1] = rk[1] ^ rk[keylen/4];
-			rk[keylen/4+2] = rk[2] ^ rk[keylen/4+1];
-			rk[keylen/4+3] = rk[3] ^ rk[keylen/4+2];
-
-			if (rk + keylen/4 + 4 == m_key.end())
-				break;
-
-			if (keylen == 24)
-			{
-				rk[10] = rk[ 4] ^ rk[ 9];
-				rk[11] = rk[ 5] ^ rk[10];
-				temp = _mm_insert_epi32(temp, rk[11], 3);
-			}
-			else if (keylen == 32)
-			{
-				temp = _mm_insert_epi32(temp, rk[11], 3);
-    			rk[12] = rk[ 4] ^ _mm_extract_epi32(_mm_aeskeygenassist_si128(temp, 0), 2);
-    			rk[13] = rk[ 5] ^ rk[12];
-    			rk[14] = rk[ 6] ^ rk[13];
-    			rk[15] = rk[ 7] ^ rk[14];
-				temp = _mm_insert_epi32(temp, rk[15], 3);
-			}
-			else
-				temp = _mm_insert_epi32(temp, rk[7], 3);
-
-			rk += keylen/4;
-		}
-
-		if (!IsForwardTransformation())
-		{
-			rk = m_key;
-			unsigned int i, j;
-
-			std::swap(*(__m128i *)(rk), *(__m128i *)(rk+4*m_rounds));
-
-			for (i = 4, j = 4*m_rounds-4; i < j; i += 4, j -= 4)
-			{
-				temp = _mm_aesimc_si128(*(__m128i *)(rk+i));
-				*(__m128i *)(rk+i) = _mm_aesimc_si128(*(__m128i *)(rk+j));
-				*(__m128i *)(rk+j) = temp;
-			}
-
-			*(__m128i *)(rk+i) = _mm_aesimc_si128(*(__m128i *)(rk+i));
-		}
-
-		return;
-	}
-#endif
-
-	GetUserKey(BIG_ENDIAN_ORDER, rk, keylen/4, userKey, keylen);
-	const word32 *rc = rcon;
-	word32 temp;
-
-	while (true)
-	{
-		temp  = rk[keylen/4-1];
-		word32 x = (word32(Se[GETBYTE(temp, 2)]) << 24) ^ (word32(Se[GETBYTE(temp, 1)]) << 16) ^ (word32(Se[GETBYTE(temp, 0)]) << 8) ^ Se[GETBYTE(temp, 3)];
-		rk[keylen/4] = rk[0] ^ x ^ *(rc++);
-		rk[keylen/4+1] = rk[1] ^ rk[keylen/4];
-		rk[keylen/4+2] = rk[2] ^ rk[keylen/4+1];
-		rk[keylen/4+3] = rk[3] ^ rk[keylen/4+2];
-
-		if (rk + keylen/4 + 4 == m_key.end())
-			break;
-
-		if (keylen == 24)
-		{
-			rk[10] = rk[ 4] ^ rk[ 9];
-			rk[11] = rk[ 5] ^ rk[10];
-		}
-		else if (keylen == 32)
-		{
-    		temp = rk[11];
-    		rk[12] = rk[ 4] ^ (word32(Se[GETBYTE(temp, 3)]) << 24) ^ (word32(Se[GETBYTE(temp, 2)]) << 16) ^ (word32(Se[GETBYTE(temp, 1)]) << 8) ^ Se[GETBYTE(temp, 0)];
-    		rk[13] = rk[ 5] ^ rk[12];
-    		rk[14] = rk[ 6] ^ rk[13];
-    		rk[15] = rk[ 7] ^ rk[14];
-		}
-		rk += keylen/4;
-	}
-
-	rk = m_key;
-
-	if (IsForwardTransformation())
-	{
-		if (!s_TeFilled)
-			FillEncTable();
-
-		ConditionalByteReverse(BIG_ENDIAN_ORDER, rk, rk, 16);
-		ConditionalByteReverse(BIG_ENDIAN_ORDER, rk + m_rounds*4, rk + m_rounds*4, 16);
-	}
-	else
-	{
-		if (!s_TdFilled)
-			FillDecTable();
-
-		unsigned int i, j;
-
-#define InverseMixColumn(x)		TL_M(Td, 0, Se[GETBYTE(x, 3)]) ^ TL_M(Td, 1, Se[GETBYTE(x, 2)]) ^ TL_M(Td, 2, Se[GETBYTE(x, 1)]) ^ TL_M(Td, 3, Se[GETBYTE(x, 0)])
-
-		for (i = 4, j = 4*m_rounds-4; i < j; i += 4, j -= 4)
-		{
-			temp = InverseMixColumn(rk[i    ]); rk[i    ] = InverseMixColumn(rk[j    ]); rk[j    ] = temp;
-			temp = InverseMixColumn(rk[i + 1]); rk[i + 1] = InverseMixColumn(rk[j + 1]); rk[j + 1] = temp;
-			temp = InverseMixColumn(rk[i + 2]); rk[i + 2] = InverseMixColumn(rk[j + 2]); rk[j + 2] = temp;
-			temp = InverseMixColumn(rk[i + 3]); rk[i + 3] = InverseMixColumn(rk[j + 3]); rk[j + 3] = temp;
-		}
-
-		rk[i+0] = InverseMixColumn(rk[i+0]);
-		rk[i+1] = InverseMixColumn(rk[i+1]);
-		rk[i+2] = InverseMixColumn(rk[i+2]);
-		rk[i+3] = InverseMixColumn(rk[i+3]);
-
-		temp = ConditionalByteReverse(BIG_ENDIAN_ORDER, rk[0]); rk[0] = ConditionalByteReverse(BIG_ENDIAN_ORDER, rk[4*m_rounds+0]); rk[4*m_rounds+0] = temp;
-		temp = ConditionalByteReverse(BIG_ENDIAN_ORDER, rk[1]); rk[1] = ConditionalByteReverse(BIG_ENDIAN_ORDER, rk[4*m_rounds+1]); rk[4*m_rounds+1] = temp;
-		temp = ConditionalByteReverse(BIG_ENDIAN_ORDER, rk[2]); rk[2] = ConditionalByteReverse(BIG_ENDIAN_ORDER, rk[4*m_rounds+2]); rk[4*m_rounds+2] = temp;
-		temp = ConditionalByteReverse(BIG_ENDIAN_ORDER, rk[3]); rk[3] = ConditionalByteReverse(BIG_ENDIAN_ORDER, rk[4*m_rounds+3]); rk[4*m_rounds+3] = temp;
-	}
-
-#if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
-	if (HasAESNI())
-		ConditionalByteReverse(BIG_ENDIAN_ORDER, rk+4, rk+4, (m_rounds-1)*16);
-#endif
-}
-
-void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
-{
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE) || CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
-	if (HasSSE2())
-#else
-	if (HasAESNI())
-#endif
-	{
-		Rijndael::Enc::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
-		return;
-	}
-#endif
-
-	typedef BlockGetAndPut<word32, NativeByteOrder> Block;
-
-	word32 s0, s1, s2, s3, t0, t1, t2, t3;
-	Block::Get(inBlock)(s0)(s1)(s2)(s3);
-
-	const word32 *rk = m_key;
-	s0 ^= rk[0];
-	s1 ^= rk[1];
-	s2 ^= rk[2];
-	s3 ^= rk[3];
-	t0 = rk[4];
-	t1 = rk[5];
-	t2 = rk[6];
-	t3 = rk[7];
-	rk += 8;
-
-	// timing attack countermeasure. see comments at top for more details
-	const int cacheLineSize = GetCacheLineSize();
-	unsigned int i;
-	word32 u = 0;
-#ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
-	for (i=0; i<2048; i+=cacheLineSize)
-#else
-	for (i=0; i<1024; i+=cacheLineSize)
-#endif
-		u &= *(const word32 *)(((const byte *)Te)+i);
-	u &= Te[255];
-	s0 |= u; s1 |= u; s2 |= u; s3 |= u;
-
-	QUARTER_ROUND_FE(s3, t0, t1, t2, t3)
-	QUARTER_ROUND_FE(s2, t3, t0, t1, t2)
-	QUARTER_ROUND_FE(s1, t2, t3, t0, t1)
-	QUARTER_ROUND_FE(s0, t1, t2, t3, t0)
-
-	// Nr - 2 full rounds:
-    unsigned int r = m_rounds/2 - 1;
-    do
-	{
-		s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
-
-		QUARTER_ROUND_E(t3, s0, s1, s2, s3)
-		QUARTER_ROUND_E(t2, s3, s0, s1, s2)
-		QUARTER_ROUND_E(t1, s2, s3, s0, s1)
-		QUARTER_ROUND_E(t0, s1, s2, s3, s0)
-
-		t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
-
-		QUARTER_ROUND_E(s3, t0, t1, t2, t3)
-		QUARTER_ROUND_E(s2, t3, t0, t1, t2)
-		QUARTER_ROUND_E(s1, t2, t3, t0, t1)
-		QUARTER_ROUND_E(s0, t1, t2, t3, t0)
-
-        rk += 8;
-    } while (--r);
-
-	word32 tbw[4];
-	byte *const tempBlock = (byte *)tbw;
-
-	QUARTER_ROUND_LE(t2, 15, 2, 5, 8)
-	QUARTER_ROUND_LE(t1, 11, 14, 1, 4)
-	QUARTER_ROUND_LE(t0, 7, 10, 13, 0)
-	QUARTER_ROUND_LE(t3, 3, 6, 9, 12)
-
-	Block::Put(xorBlock, outBlock)(tbw[0]^rk[0])(tbw[1]^rk[1])(tbw[2]^rk[2])(tbw[3]^rk[3]);
-}
-
-void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
-{
-#if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
-	if (HasAESNI())
-	{
-		Rijndael::Dec::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
-		return;
-	}
-#endif
-
-	typedef BlockGetAndPut<word32, NativeByteOrder> Block;
-
-	word32 s0, s1, s2, s3, t0, t1, t2, t3;
-	Block::Get(inBlock)(s0)(s1)(s2)(s3);
-
-	const word32 *rk = m_key;
-	s0 ^= rk[0];
-	s1 ^= rk[1];
-	s2 ^= rk[2];
-	s3 ^= rk[3];
-	t0 = rk[4];
-	t1 = rk[5];
-	t2 = rk[6];
-	t3 = rk[7];
-	rk += 8;
-
-	// timing attack countermeasure. see comments at top for more details
-	const int cacheLineSize = GetCacheLineSize();
-	unsigned int i;
-	word32 u = 0;
-#ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
-	for (i=0; i<2048; i+=cacheLineSize)
-#else
-	for (i=0; i<1024; i+=cacheLineSize)
-#endif
-		u &= *(const word32 *)(((const byte *)Td)+i);
-	u &= Td[255];
-	s0 |= u; s1 |= u; s2 |= u; s3 |= u;
-
-	QUARTER_ROUND_FD(s3, t2, t1, t0, t3)
-	QUARTER_ROUND_FD(s2, t1, t0, t3, t2)
-	QUARTER_ROUND_FD(s1, t0, t3, t2, t1)
-	QUARTER_ROUND_FD(s0, t3, t2, t1, t0)
-
-	// Nr - 2 full rounds:
-    unsigned int r = m_rounds/2 - 1;
-    do
-	{
-		s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
-
-		QUARTER_ROUND_D(t3, s2, s1, s0, s3)
-		QUARTER_ROUND_D(t2, s1, s0, s3, s2)
-		QUARTER_ROUND_D(t1, s0, s3, s2, s1)
-		QUARTER_ROUND_D(t0, s3, s2, s1, s0)
-
-		t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
-
-		QUARTER_ROUND_D(s3, t2, t1, t0, t3)
-		QUARTER_ROUND_D(s2, t1, t0, t3, t2)
-		QUARTER_ROUND_D(s1, t0, t3, t2, t1)
-		QUARTER_ROUND_D(s0, t3, t2, t1, t0)
-
-        rk += 8;
-    } while (--r);
-
-#ifndef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
-	// timing attack countermeasure. see comments at top for more details
-	// If CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS is defined, 
-	// QUARTER_ROUND_LD will use Td, which is already preloaded.
-	u = 0;
-	for (i=0; i<256; i+=cacheLineSize)
-		u &= *(const word32 *)(Sd+i);
-	u &= *(const word32 *)(Sd+252);
-	t0 |= u; t1 |= u; t2 |= u; t3 |= u;
-#endif
-
-	word32 tbw[4];
-	byte *const tempBlock = (byte *)tbw;
-
-	QUARTER_ROUND_LD(t2, 7, 2, 13, 8)
-	QUARTER_ROUND_LD(t1, 3, 14, 9, 4)
-	QUARTER_ROUND_LD(t0, 15, 10, 5, 0)
-	QUARTER_ROUND_LD(t3, 11, 6, 1, 12)
-
-	Block::Put(xorBlock, outBlock)(tbw[0]^rk[0])(tbw[1]^rk[1])(tbw[2]^rk[2])(tbw[3]^rk[3]);
-}
-
-// ************************* Assembly Code ************************************
-
-#pragma warning(disable: 4731)	// frame pointer register 'ebp' modified by inline assembly code
-
-#endif	// #ifndef CRYPTOPP_GENERATE_X64_MASM
-
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
-
-CRYPTOPP_NAKED void CRYPTOPP_FASTCALL Rijndael_Enc_AdvancedProcessBlocks(void *locals, const word32 *k)
-{
-#if CRYPTOPP_BOOL_X86
-
-#define L_REG			esp
-#define L_INDEX(i)		(L_REG+768+i)
-#define L_INXORBLOCKS	L_INBLOCKS+4
-#define L_OUTXORBLOCKS	L_INBLOCKS+8
-#define L_OUTBLOCKS		L_INBLOCKS+12
-#define L_INCREMENTS	L_INDEX(16*15)
-#define L_SP			L_INDEX(16*16)
-#define L_LENGTH		L_INDEX(16*16+4)
-#define L_KEYS_BEGIN	L_INDEX(16*16+8)
-
-#define MOVD			movd
-#define MM(i)			mm##i
-
-#define MXOR(a,b,c)	\
-	AS2(	movzx	esi, b)\
-	AS2(	movd	mm7, DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\
-	AS2(	pxor	MM(a), mm7)\
-
-#define MMOV(a,b,c)	\
-	AS2(	movzx	esi, b)\
-	AS2(	movd	MM(a), DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\
-
-#else
-
-#define L_REG			r8
-#define L_INDEX(i)		(L_REG+i)
-#define L_INXORBLOCKS	L_INBLOCKS+8
-#define L_OUTXORBLOCKS	L_INBLOCKS+16
-#define L_OUTBLOCKS		L_INBLOCKS+24
-#define L_INCREMENTS	L_INDEX(16*16)
-#define L_LENGTH		L_INDEX(16*18+8)
-#define L_KEYS_BEGIN	L_INDEX(16*19)
-
-#define MOVD			mov
-#define MM_0			r9d
-#define MM_1			r12d
-#ifdef __GNUC__
-#define MM_2			r11d
-#else
-#define MM_2			r10d
-#endif
-#define MM(i)			MM_##i
-
-#define MXOR(a,b,c)	\
-	AS2(	movzx	esi, b)\
-	AS2(	xor		MM(a), DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\
-
-#define MMOV(a,b,c)	\
-	AS2(	movzx	esi, b)\
-	AS2(	mov		MM(a), DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\
-
-#endif
-
-#define L_SUBKEYS		L_INDEX(0)
-#define L_SAVED_X		L_SUBKEYS
-#define L_KEY12			L_INDEX(16*12)
-#define L_LASTROUND		L_INDEX(16*13)
-#define L_INBLOCKS		L_INDEX(16*14)
-#define MAP0TO4(i)		(ASM_MOD(i+3,4)+1)
-
-#define XOR(a,b,c)	\
-	AS2(	movzx	esi, b)\
-	AS2(	xor		a, DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\
-
-#define MOV(a,b,c)	\
-	AS2(	movzx	esi, b)\
-	AS2(	mov		a, DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\
-
-#ifdef CRYPTOPP_GENERATE_X64_MASM
-		ALIGN   8
-	Rijndael_Enc_AdvancedProcessBlocks	PROC FRAME
-		rex_push_reg rsi
-		push_reg rdi
-		push_reg rbx
-		push_reg r12
-		.endprolog
-		mov L_REG, rcx
-		mov AS_REG_7, ?Te@rdtable@CryptoPP@@3PA_KA
-		mov edi, DWORD PTR [?g_cacheLineSize@CryptoPP@@3IA]
-#elif defined(__GNUC__)
-	__asm__ __volatile__
-	(
-	".intel_syntax noprefix;"
-	#if CRYPTOPP_BOOL_X64
-	AS2(	mov		L_REG, rcx)
-	#endif
-	AS_PUSH_IF86(bx)
-	AS_PUSH_IF86(bp)
-	AS2(	mov		AS_REG_7, WORD_REG(si))
-#else
-	AS_PUSH_IF86(si)
-	AS_PUSH_IF86(di)
-	AS_PUSH_IF86(bx)
-	AS_PUSH_IF86(bp)
-	AS2(	lea		AS_REG_7, [Te])
-	AS2(	mov		edi, [g_cacheLineSize])
-#endif
-
-#if CRYPTOPP_BOOL_X86
-	AS2(	mov		[ecx+16*12+16*4], esp)	// save esp to L_SP
-	AS2(	lea		esp, [ecx-768])
-#endif
-
-	// copy subkeys to stack
-	AS2(	mov		WORD_REG(si), [L_KEYS_BEGIN])
-	AS2(	mov		WORD_REG(ax), 16)
-	AS2(	and		WORD_REG(ax), WORD_REG(si))
-	AS2(	movdqa	xmm3, XMMWORD_PTR [WORD_REG(dx)+16+WORD_REG(ax)])	// subkey 1 (non-counter) or 2 (counter)
-	AS2(	movdqa	[L_KEY12], xmm3)
-	AS2(	lea		WORD_REG(ax), [WORD_REG(dx)+WORD_REG(ax)+2*16])
-	AS2(	sub		WORD_REG(ax), WORD_REG(si))
-	ASL(0)
-	AS2(	movdqa	xmm0, [WORD_REG(ax)+WORD_REG(si)])
-	AS2(	movdqa	XMMWORD_PTR [L_SUBKEYS+WORD_REG(si)], xmm0)
-	AS2(	add		WORD_REG(si), 16)
-	AS2(	cmp		WORD_REG(si), 16*12)
-	ASJ(	jl,		0, b)
-
-	// read subkeys 0, 1 and last
-	AS2(	movdqa	xmm4, [WORD_REG(ax)+WORD_REG(si)])	// last subkey
-	AS2(	movdqa	xmm1, [WORD_REG(dx)])			// subkey 0
-	AS2(	MOVD	MM(1), [WORD_REG(dx)+4*4])		// 0,1,2,3
-	AS2(	mov		ebx, [WORD_REG(dx)+5*4])		// 4,5,6,7
-	AS2(	mov		ecx, [WORD_REG(dx)+6*4])		// 8,9,10,11
-	AS2(	mov		edx, [WORD_REG(dx)+7*4])		// 12,13,14,15
-
-	// load table into cache
-	AS2(	xor		WORD_REG(ax), WORD_REG(ax))
-	ASL(9)
-	AS2(	mov		esi, [AS_REG_7+WORD_REG(ax)])
-	AS2(	add		WORD_REG(ax), WORD_REG(di))
-	AS2(	mov		esi, [AS_REG_7+WORD_REG(ax)])
-	AS2(	add		WORD_REG(ax), WORD_REG(di))
-	AS2(	mov		esi, [AS_REG_7+WORD_REG(ax)])
-	AS2(	add		WORD_REG(ax), WORD_REG(di))
-	AS2(	mov		esi, [AS_REG_7+WORD_REG(ax)])
-	AS2(	add		WORD_REG(ax), WORD_REG(di))
-	AS2(	cmp		WORD_REG(ax), 2048)
-	ASJ(	jl,		9, b)
-	AS1(	lfence)
-
-	AS2(	test	DWORD PTR [L_LENGTH], 1)
-	ASJ(	jz,		8, f)
-
-	// counter mode one-time setup
-	AS2(	mov		WORD_REG(si), [L_INBLOCKS])
-	AS2(	movdqu	xmm2, [WORD_REG(si)])	// counter
-	AS2(	pxor	xmm2, xmm1)
-	AS2(	psrldq	xmm1, 14)
-	AS2(	movd	eax, xmm1)
-	AS2(	mov		al, BYTE PTR [WORD_REG(si)+15])
-	AS2(	MOVD	MM(2), eax)
-#if CRYPTOPP_BOOL_X86
-	AS2(	mov		eax, 1)
-	AS2(	movd	mm3, eax)
-#endif
-
-	// partial first round, in: xmm2(15,14,13,12;11,10,9,8;7,6,5,4;3,2,1,0), out: mm1, ebx, ecx, edx
-	AS2(	movd	eax, xmm2)
-	AS2(	psrldq	xmm2, 4)
-	AS2(	movd	edi, xmm2)
-	AS2(	psrldq	xmm2, 4)
-		MXOR(		1, al, 0)		// 0
-		XOR(		edx, ah, 1)		// 1
-	AS2(	shr		eax, 16)
-		XOR(		ecx, al, 2)		// 2
-		XOR(		ebx, ah, 3)		// 3
-	AS2(	mov		eax, edi)
-	AS2(	movd	edi, xmm2)
-	AS2(	psrldq	xmm2, 4)
-		XOR(		ebx, al, 0)		// 4
-		MXOR(		1, ah, 1)		// 5
-	AS2(	shr		eax, 16)
-		XOR(		edx, al, 2)		// 6
-		XOR(		ecx, ah, 3)		// 7
-	AS2(	mov		eax, edi)
-	AS2(	movd	edi, xmm2)
-		XOR(		ecx, al, 0)		// 8
-		XOR(		ebx, ah, 1)		// 9
-	AS2(	shr		eax, 16)
-		MXOR(		1, al, 2)		// 10
-		XOR(		edx, ah, 3)		// 11
-	AS2(	mov		eax, edi)
-		XOR(		edx, al, 0)		// 12
-		XOR(		ecx, ah, 1)		// 13
-	AS2(	shr		eax, 16)
-		XOR(		ebx, al, 2)		// 14
-	AS2(	psrldq	xmm2, 3)
-
-	// partial second round, in: ebx(4,5,6,7), ecx(8,9,10,11), edx(12,13,14,15), out: eax, ebx, edi, mm0
-	AS2(	mov		eax, [L_KEY12+0*4])
-	AS2(	mov		edi, [L_KEY12+2*4])
-	AS2(	MOVD	MM(0), [L_KEY12+3*4])
-		MXOR(	0, cl, 3)	/* 11 */
-		XOR(	edi, bl, 3)	/* 7 */
-		MXOR(	0, bh, 2)	/* 6 */
-	AS2(	shr ebx, 16)	/* 4,5 */
-		XOR(	eax, bl, 1)	/* 5 */
-		MOV(	ebx, bh, 0)	/* 4 */
-	AS2(	xor		ebx, [L_KEY12+1*4])
-		XOR(	eax, ch, 2)	/* 10 */
-	AS2(	shr ecx, 16)	/* 8,9 */
-		XOR(	eax, dl, 3)	/* 15 */
-		XOR(	ebx, dh, 2)	/* 14 */
-	AS2(	shr edx, 16)	/* 12,13 */
-		XOR(	edi, ch, 0)	/* 8 */
-		XOR(	ebx, cl, 1)	/* 9 */
-		XOR(	edi, dl, 1)	/* 13 */
-		MXOR(	0, dh, 0)	/* 12 */
-
-	AS2(	movd	ecx, xmm2)
-	AS2(	MOVD	edx, MM(1))
-	AS2(	MOVD	[L_SAVED_X+3*4], MM(0))
-	AS2(	mov		[L_SAVED_X+0*4], eax)
-	AS2(	mov		[L_SAVED_X+1*4], ebx)
-	AS2(	mov		[L_SAVED_X+2*4], edi)
-	ASJ(	jmp,	5, f)
-
-	ASL(3)
-	// non-counter mode per-block setup
-	AS2(	MOVD	MM(1), [L_KEY12+0*4])	// 0,1,2,3
-	AS2(	mov		ebx, [L_KEY12+1*4])		// 4,5,6,7
-	AS2(	mov		ecx, [L_KEY12+2*4])		// 8,9,10,11
-	AS2(	mov		edx, [L_KEY12+3*4])		// 12,13,14,15
-	ASL(8)
-	AS2(	mov		WORD_REG(ax), [L_INBLOCKS])
-	AS2(	movdqu	xmm2, [WORD_REG(ax)])
-	AS2(	mov		WORD_REG(si), [L_INXORBLOCKS])
-	AS2(	movdqu	xmm5, [WORD_REG(si)])
-	AS2(	pxor	xmm2, xmm1)
-	AS2(	pxor	xmm2, xmm5)
-
-	// first round, in: xmm2(15,14,13,12;11,10,9,8;7,6,5,4;3,2,1,0), out: eax, ebx, ecx, edx
-	AS2(	movd	eax, xmm2)
-	AS2(	psrldq	xmm2, 4)
-	AS2(	movd	edi, xmm2)
-	AS2(	psrldq	xmm2, 4)
-		MXOR(		1, al, 0)		// 0
-		XOR(		edx, ah, 1)		// 1
-	AS2(	shr		eax, 16)
-		XOR(		ecx, al, 2)		// 2
-		XOR(		ebx, ah, 3)		// 3
-	AS2(	mov		eax, edi)
-	AS2(	movd	edi, xmm2)
-	AS2(	psrldq	xmm2, 4)
-		XOR(		ebx, al, 0)		// 4
-		MXOR(		1, ah, 1)		// 5
-	AS2(	shr		eax, 16)
-		XOR(		edx, al, 2)		// 6
-		XOR(		ecx, ah, 3)		// 7
-	AS2(	mov		eax, edi)
-	AS2(	movd	edi, xmm2)
-		XOR(		ecx, al, 0)		// 8
-		XOR(		ebx, ah, 1)		// 9
-	AS2(	shr		eax, 16)
-		MXOR(		1, al, 2)		// 10
-		XOR(		edx, ah, 3)		// 11
-	AS2(	mov		eax, edi)
-		XOR(		edx, al, 0)		// 12
-		XOR(		ecx, ah, 1)		// 13
-	AS2(	shr		eax, 16)
-		XOR(		ebx, al, 2)		// 14
-		MXOR(		1, ah, 3)		// 15
-	AS2(	MOVD	eax, MM(1))
-
-	AS2(	add		L_REG, [L_KEYS_BEGIN])
-	AS2(	add		L_REG, 4*16)
-	ASJ(	jmp,	2, f)
-
-	ASL(1)
-	// counter-mode per-block setup
-	AS2(	MOVD	ecx, MM(2))
-	AS2(	MOVD	edx, MM(1))
-	AS2(	mov		eax, [L_SAVED_X+0*4])
-	AS2(	mov		ebx, [L_SAVED_X+1*4])
-	AS2(	xor		cl, ch)
-	AS2(	and		WORD_REG(cx), 255)
-	ASL(5)
-#if CRYPTOPP_BOOL_X86
-	AS2(	paddb	MM(2), mm3)
-#else
-	AS2(	add		MM(2), 1)
-#endif
-	// remaining part of second round, in: edx(previous round),esi(keyed counter byte) eax,ebx,[L_SAVED_X+2*4],[L_SAVED_X+3*4], out: eax,ebx,ecx,edx
-	AS2(	xor		edx, DWORD PTR [AS_REG_7+WORD_REG(cx)*8+3])
-		XOR(		ebx, dl, 3)
-		MOV(		ecx, dh, 2)
-	AS2(	shr		edx, 16)
-	AS2(	xor		ecx, [L_SAVED_X+2*4])
-		XOR(		eax, dh, 0)
-		MOV(		edx, dl, 1)
-	AS2(	xor		edx, [L_SAVED_X+3*4])
-
-	AS2(	add		L_REG, [L_KEYS_BEGIN])
-	AS2(	add		L_REG, 3*16)
-	ASJ(	jmp,	4, f)
-
-// in: eax(0,1,2,3), ebx(4,5,6,7), ecx(8,9,10,11), edx(12,13,14,15)
-// out: eax, ebx, edi, mm0
-#define ROUND()		\
-		MXOR(	0, cl, 3)	/* 11 */\
-	AS2(	mov	cl, al)		/* 8,9,10,3 */\
-		XOR(	edi, ah, 2)	/* 2 */\
-	AS2(	shr eax, 16)	/* 0,1 */\
-		XOR(	edi, bl, 3)	/* 7 */\
-		MXOR(	0, bh, 2)	/* 6 */\
-	AS2(	shr ebx, 16)	/* 4,5 */\
-		MXOR(	0, al, 1)	/* 1 */\
-		MOV(	eax, ah, 0)	/* 0 */\
-		XOR(	eax, bl, 1)	/* 5 */\
-		MOV(	ebx, bh, 0)	/* 4 */\
-		XOR(	eax, ch, 2)	/* 10 */\
-		XOR(	ebx, cl, 3)	/* 3 */\
-	AS2(	shr ecx, 16)	/* 8,9 */\
-		XOR(	eax, dl, 3)	/* 15 */\
-		XOR(	ebx, dh, 2)	/* 14 */\
-	AS2(	shr edx, 16)	/* 12,13 */\
-		XOR(	edi, ch, 0)	/* 8 */\
-		XOR(	ebx, cl, 1)	/* 9 */\
-		XOR(	edi, dl, 1)	/* 13 */\
-		MXOR(	0, dh, 0)	/* 12 */\
-
-	ASL(2)	// 2-round loop
-	AS2(	MOVD	MM(0), [L_SUBKEYS-4*16+3*4])
-	AS2(	mov		edi, [L_SUBKEYS-4*16+2*4])
-	ROUND()
-	AS2(	mov		ecx, edi)
-	AS2(	xor		eax, [L_SUBKEYS-4*16+0*4])
-	AS2(	xor		ebx, [L_SUBKEYS-4*16+1*4])
-	AS2(	MOVD	edx, MM(0))
-
-	ASL(4)
-	AS2(	MOVD	MM(0), [L_SUBKEYS-4*16+7*4])
-	AS2(	mov		edi, [L_SUBKEYS-4*16+6*4])
-	ROUND()
-	AS2(	mov		ecx, edi)
-	AS2(	xor		eax, [L_SUBKEYS-4*16+4*4])
-	AS2(	xor		ebx, [L_SUBKEYS-4*16+5*4])
-	AS2(	MOVD	edx, MM(0))
-
-	AS2(	add		L_REG, 32)
-	AS2(	test	L_REG, 255)
-	ASJ(	jnz,	2, b)
-	AS2(	sub		L_REG, 16*16)
-
-#define LAST(a, b, c)												\
-	AS2(	movzx	esi, a											)\
-	AS2(	movzx	edi, BYTE PTR [AS_REG_7+WORD_REG(si)*8+1]	)\
-	AS2(	movzx	esi, b											)\
-	AS2(	xor		edi, DWORD PTR [AS_REG_7+WORD_REG(si)*8+0]	)\
-	AS2(	mov		WORD PTR [L_LASTROUND+c], di					)\
-
-	// last round
-	LAST(ch, dl, 2)
-	LAST(dh, al, 6)
-	AS2(	shr		edx, 16)
-	LAST(ah, bl, 10)
-	AS2(	shr		eax, 16)
-	LAST(bh, cl, 14)
-	AS2(	shr		ebx, 16)
-	LAST(dh, al, 12)
-	AS2(	shr		ecx, 16)
-	LAST(ah, bl, 0)
-	LAST(bh, cl, 4)
-	LAST(ch, dl, 8)
-
-	AS2(	mov		WORD_REG(ax), [L_OUTXORBLOCKS])
-	AS2(	mov		WORD_REG(bx), [L_OUTBLOCKS])
-
-	AS2(	mov		WORD_REG(cx), [L_LENGTH])
-	AS2(	sub		WORD_REG(cx), 16)
-
-	AS2(	movdqu	xmm2, [WORD_REG(ax)])
-	AS2(	pxor	xmm2, xmm4)
-
-#if CRYPTOPP_BOOL_X86
-	AS2(	movdqa	xmm0, [L_INCREMENTS])
-	AS2(	paddd	xmm0, [L_INBLOCKS])
-	AS2(	movdqa	[L_INBLOCKS], xmm0)
-#else
-	AS2(	movdqa	xmm0, [L_INCREMENTS+16])
-	AS2(	paddq	xmm0, [L_INBLOCKS+16])
-	AS2(	movdqa	[L_INBLOCKS+16], xmm0)
-#endif
-
-	AS2(	pxor	xmm2, [L_LASTROUND])
-	AS2(	movdqu	[WORD_REG(bx)], xmm2)
-
-	ASJ(	jle,	7, f)
-	AS2(	mov		[L_LENGTH], WORD_REG(cx))
-	AS2(	test	WORD_REG(cx), 1)
-	ASJ(	jnz,	1, b)
-#if CRYPTOPP_BOOL_X64
-	AS2(	movdqa	xmm0, [L_INCREMENTS])
-	AS2(	paddq	xmm0, [L_INBLOCKS])
-	AS2(	movdqa	[L_INBLOCKS], xmm0)
-#endif
-	ASJ(	jmp,	3, b)
-
-	ASL(7)
-	// erase keys on stack
-	AS2(	xorps	xmm0, xmm0)
-	AS2(	lea		WORD_REG(ax), [L_SUBKEYS+7*16])
-	AS2(	movaps	[WORD_REG(ax)-7*16], xmm0)
-	AS2(	movaps	[WORD_REG(ax)-6*16], xmm0)
-	AS2(	movaps	[WORD_REG(ax)-5*16], xmm0)
-	AS2(	movaps	[WORD_REG(ax)-4*16], xmm0)
-	AS2(	movaps	[WORD_REG(ax)-3*16], xmm0)
-	AS2(	movaps	[WORD_REG(ax)-2*16], xmm0)
-	AS2(	movaps	[WORD_REG(ax)-1*16], xmm0)
-	AS2(	movaps	[WORD_REG(ax)+0*16], xmm0)
-	AS2(	movaps	[WORD_REG(ax)+1*16], xmm0)
-	AS2(	movaps	[WORD_REG(ax)+2*16], xmm0)
-	AS2(	movaps	[WORD_REG(ax)+3*16], xmm0)
-	AS2(	movaps	[WORD_REG(ax)+4*16], xmm0)
-	AS2(	movaps	[WORD_REG(ax)+5*16], xmm0)
-	AS2(	movaps	[WORD_REG(ax)+6*16], xmm0)
-#if CRYPTOPP_BOOL_X86
-	AS2(	mov		esp, [L_SP])
-	AS1(	emms)
-#endif
-	AS_POP_IF86(bp)
-	AS_POP_IF86(bx)
-#if defined(_MSC_VER) && CRYPTOPP_BOOL_X86
-	AS_POP_IF86(di)
-	AS_POP_IF86(si)
-	AS1(ret)
-#endif
-#ifdef CRYPTOPP_GENERATE_X64_MASM
-	pop r12
-	pop rbx
-	pop rdi
-	pop rsi
-	ret
-	Rijndael_Enc_AdvancedProcessBlocks ENDP
-#endif
-#ifdef __GNUC__
-	".att_syntax prefix;"
-	: 
-	: "c" (locals), "d" (k), "S" (Te), "D" (g_cacheLineSize)
-	: "memory", "cc", "%eax"
-	#if CRYPTOPP_BOOL_X64
-		, "%rbx", "%r8", "%r9", "%r10", "%r11", "%r12"
-	#endif
-	);
-#endif
-}
-
-#endif
-
-#ifndef CRYPTOPP_GENERATE_X64_MASM
-
-#ifdef CRYPTOPP_X64_MASM_AVAILABLE
-extern "C" {
-void Rijndael_Enc_AdvancedProcessBlocks(void *locals, const word32 *k);
-}
-#endif
-
-#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86
-
-static inline bool AliasedWithTable(const byte *begin, const byte *end)
-{
-	size_t s0 = size_t(begin)%4096, s1 = size_t(end)%4096;
-	size_t t0 = size_t(Te)%4096, t1 = (size_t(Te)+sizeof(Te))%4096;
-	if (t1 > t0)
-		return (s0 >= t0 && s0 < t1) || (s1 > t0 && s1 <= t1);
-	else
-		return (s0 < t1 || s1 <= t1) || (s0 >= t0 || s1 > t0);
-}
-
-#if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
-
-inline void AESNI_Enc_Block(__m128i &block, const __m128i *subkeys, unsigned int rounds)
-{
-	block = _mm_xor_si128(block, subkeys[0]);
-	for (unsigned int i=1; i<rounds-1; i+=2)
-	{
-		block = _mm_aesenc_si128(block, subkeys[i]);
-		block = _mm_aesenc_si128(block, subkeys[i+1]);
-	}
-	block = _mm_aesenc_si128(block, subkeys[rounds-1]);
-	block = _mm_aesenclast_si128(block, subkeys[rounds]);
-}
-
-inline void AESNI_Enc_4_Blocks(__m128i &block0, __m128i &block1, __m128i &block2, __m128i &block3, const __m128i *subkeys, unsigned int rounds)
-{
-	__m128i rk = subkeys[0];
-	block0 = _mm_xor_si128(block0, rk);
-	block1 = _mm_xor_si128(block1, rk);
-	block2 = _mm_xor_si128(block2, rk);
-	block3 = _mm_xor_si128(block3, rk);
-	for (unsigned int i=1; i<rounds; i++)
-	{
-		rk = subkeys[i];
-		block0 = _mm_aesenc_si128(block0, rk);
-		block1 = _mm_aesenc_si128(block1, rk);
-		block2 = _mm_aesenc_si128(block2, rk);
-		block3 = _mm_aesenc_si128(block3, rk);
-	}
-	rk = subkeys[rounds];
-	block0 = _mm_aesenclast_si128(block0, rk);
-	block1 = _mm_aesenclast_si128(block1, rk);
-	block2 = _mm_aesenclast_si128(block2, rk);
-	block3 = _mm_aesenclast_si128(block3, rk);
-}
-
-inline void AESNI_Dec_Block(__m128i &block, const __m128i *subkeys, unsigned int rounds)
-{
-	block = _mm_xor_si128(block, subkeys[0]);
-	for (unsigned int i=1; i<rounds-1; i+=2)
-	{
-		block = _mm_aesdec_si128(block, subkeys[i]);
-		block = _mm_aesdec_si128(block, subkeys[i+1]);
-	}
-	block = _mm_aesdec_si128(block, subkeys[rounds-1]);
-	block = _mm_aesdeclast_si128(block, subkeys[rounds]);
-}
-
-inline void AESNI_Dec_4_Blocks(__m128i &block0, __m128i &block1, __m128i &block2, __m128i &block3, const __m128i *subkeys, unsigned int rounds)
-{
-	__m128i rk = subkeys[0];
-	block0 = _mm_xor_si128(block0, rk);
-	block1 = _mm_xor_si128(block1, rk);
-	block2 = _mm_xor_si128(block2, rk);
-	block3 = _mm_xor_si128(block3, rk);
-	for (unsigned int i=1; i<rounds; i++)
-	{
-		rk = subkeys[i];
-		block0 = _mm_aesdec_si128(block0, rk);
-		block1 = _mm_aesdec_si128(block1, rk);
-		block2 = _mm_aesdec_si128(block2, rk);
-		block3 = _mm_aesdec_si128(block3, rk);
-	}
-	rk = subkeys[rounds];
-	block0 = _mm_aesdeclast_si128(block0, rk);
-	block1 = _mm_aesdeclast_si128(block1, rk);
-	block2 = _mm_aesdeclast_si128(block2, rk);
-	block3 = _mm_aesdeclast_si128(block3, rk);
-}
-
-static CRYPTOPP_ALIGN_DATA(16) const word32 s_one[] = {0, 0, 0, 1<<24};
-
-template <typename F1, typename F4>
-inline size_t AESNI_AdvancedProcessBlocks(F1 func1, F4 func4, const __m128i *subkeys, unsigned int rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
-{
-	size_t blockSize = 16;
-	size_t inIncrement = (flags & (BlockTransformation::BT_InBlockIsCounter|BlockTransformation::BT_DontIncrementInOutPointers)) ? 0 : blockSize;
-	size_t xorIncrement = xorBlocks ? blockSize : 0;
-	size_t outIncrement = (flags & BlockTransformation::BT_DontIncrementInOutPointers) ? 0 : blockSize;
-
-	if (flags & BlockTransformation::BT_ReverseDirection)
-	{
-		assert(length % blockSize == 0);
-		inBlocks += length - blockSize;
-		xorBlocks += length - blockSize;
-		outBlocks += length - blockSize;
-		inIncrement = 0-inIncrement;
-		xorIncrement = 0-xorIncrement;
-		outIncrement = 0-outIncrement;
-	}
-
-	if (flags & BlockTransformation::BT_AllowParallel)
-	{
-		while (length >= 4*blockSize)
-		{
-			__m128i block0 = _mm_loadu_si128((const __m128i *)inBlocks), block1, block2, block3;
-			if (flags & BlockTransformation::BT_InBlockIsCounter)
-			{
-				const __m128i be1 = *(const __m128i *)s_one;
-				block1 = _mm_add_epi32(block0, be1);
-				block2 = _mm_add_epi32(block1, be1);
-				block3 = _mm_add_epi32(block2, be1);
-				_mm_storeu_si128((__m128i *)inBlocks, _mm_add_epi32(block3, be1));
-			}
-			else
-			{
-				inBlocks += inIncrement;
-				block1 = _mm_loadu_si128((const __m128i *)inBlocks);
-				inBlocks += inIncrement;
-				block2 = _mm_loadu_si128((const __m128i *)inBlocks);
-				inBlocks += inIncrement;
-				block3 = _mm_loadu_si128((const __m128i *)inBlocks);
-				inBlocks += inIncrement;
-			}
-
-			if (flags & BlockTransformation::BT_XorInput)
-			{
-				block0 = _mm_xor_si128(block0, _mm_loadu_si128((const __m128i *)xorBlocks));
-				xorBlocks += xorIncrement;
-				block1 = _mm_xor_si128(block1, _mm_loadu_si128((const __m128i *)xorBlocks));
-				xorBlocks += xorIncrement;
-				block2 = _mm_xor_si128(block2, _mm_loadu_si128((const __m128i *)xorBlocks));
-				xorBlocks += xorIncrement;
-				block3 = _mm_xor_si128(block3, _mm_loadu_si128((const __m128i *)xorBlocks));
-				xorBlocks += xorIncrement;
-			}
-
-			func4(block0, block1, block2, block3, subkeys, rounds);
-
-			if (xorBlocks && !(flags & BlockTransformation::BT_XorInput))
-			{
-				block0 = _mm_xor_si128(block0, _mm_loadu_si128((const __m128i *)xorBlocks));
-				xorBlocks += xorIncrement;
-				block1 = _mm_xor_si128(block1, _mm_loadu_si128((const __m128i *)xorBlocks));
-				xorBlocks += xorIncrement;
-				block2 = _mm_xor_si128(block2, _mm_loadu_si128((const __m128i *)xorBlocks));
-				xorBlocks += xorIncrement;
-				block3 = _mm_xor_si128(block3, _mm_loadu_si128((const __m128i *)xorBlocks));
-				xorBlocks += xorIncrement;
-			}
-
-			_mm_storeu_si128((__m128i *)outBlocks, block0);
-			outBlocks += outIncrement;
-			_mm_storeu_si128((__m128i *)outBlocks, block1);
-			outBlocks += outIncrement;
-			_mm_storeu_si128((__m128i *)outBlocks, block2);
-			outBlocks += outIncrement;
-			_mm_storeu_si128((__m128i *)outBlocks, block3);
-			outBlocks += outIncrement;
-
-			length -= 4*blockSize;
-		}
-	}
-
-	while (length >= blockSize)
-	{
-		__m128i block = _mm_loadu_si128((const __m128i *)inBlocks);
-
-		if (flags & BlockTransformation::BT_XorInput)
-			block = _mm_xor_si128(block, _mm_loadu_si128((const __m128i *)xorBlocks));
-
-		if (flags & BlockTransformation::BT_InBlockIsCounter)
-			const_cast<byte *>(inBlocks)[15]++;
-
-		func1(block, subkeys, rounds);
-
-		if (xorBlocks && !(flags & BlockTransformation::BT_XorInput))
-			block = _mm_xor_si128(block, _mm_loadu_si128((const __m128i *)xorBlocks));
-			
-		_mm_storeu_si128((__m128i *)outBlocks, block);
-
-		inBlocks += inIncrement;
-		outBlocks += outIncrement;
-		xorBlocks += xorIncrement;
-		length -= blockSize;
-	}
-
-	return length;
-}
-#endif
-
-size_t Rijndael::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) const
-{
-#if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
-	if (HasAESNI())
-		return AESNI_AdvancedProcessBlocks(AESNI_Enc_Block, AESNI_Enc_4_Blocks, (const __m128i *)m_key.begin(), m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
-#endif
-	
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
-	if (HasSSE2())
-	{
-		if (length < BLOCKSIZE)
-			return length;
-
-		struct Locals
-		{
-			word32 subkeys[4*12], workspace[8];
-			const byte *inBlocks, *inXorBlocks, *outXorBlocks;
-			byte *outBlocks;
-			size_t inIncrement, inXorIncrement, outXorIncrement, outIncrement;
-			size_t regSpill, lengthAndCounterFlag, keysBegin;
-		};
-
-		size_t increment = BLOCKSIZE;
-		const byte* zeros = (byte *)(Te+256);
-		byte *space;
-
-		do {
-			space = (byte *)alloca(255+sizeof(Locals));
-			space += (256-(size_t)space%256)%256;
-		}
-		while (AliasedWithTable(space, space+sizeof(Locals)));
-
-		if (flags & BT_ReverseDirection)
-		{
-			assert(length % BLOCKSIZE == 0);
-			inBlocks += length - BLOCKSIZE;
-			xorBlocks += length - BLOCKSIZE;
-			outBlocks += length - BLOCKSIZE;
-			increment = 0-increment;
-		}
-
-		Locals &locals = *(Locals *)space;
-
-		locals.inBlocks = inBlocks;
-		locals.inXorBlocks = (flags & BT_XorInput) && xorBlocks ? xorBlocks : zeros;
-		locals.outXorBlocks = (flags & BT_XorInput) || !xorBlocks ? zeros : xorBlocks;
-		locals.outBlocks = outBlocks;
-
-		locals.inIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : increment;
-		locals.inXorIncrement = (flags & BT_XorInput) && xorBlocks ? increment : 0;
-		locals.outXorIncrement = (flags & BT_XorInput) || !xorBlocks ? 0 : increment;
-		locals.outIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : increment;
-
-		locals.lengthAndCounterFlag = length - (length%16) - bool(flags & BT_InBlockIsCounter);
-		int keysToCopy = m_rounds - (flags & BT_InBlockIsCounter ? 3 : 2);
-		locals.keysBegin = (12-keysToCopy)*16;
-
-		Rijndael_Enc_AdvancedProcessBlocks(&locals, m_key);
-		return length % BLOCKSIZE;
-	}
-#endif
-
-	return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags);
-}
-
-#endif
-
-#if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
-
-size_t Rijndael::Dec::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) const
-{
-	if (HasAESNI())
-		return AESNI_AdvancedProcessBlocks(AESNI_Dec_Block, AESNI_Dec_4_Blocks, (const __m128i *)m_key.begin(), m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
-	
-	return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags);
-}
-
-#endif	// #if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
-
-NAMESPACE_END
-
-#endif
-#endif
author	madmaxoft <github@xoft.cz>	2014-01-22 22:26:40 +0100
committer	madmaxoft <github@xoft.cz>	2014-01-22 22:26:40 +0100
commit	34f13d589a2ebbcae9230732c7a763b3cdd88b41 (patch)
tree	4f7bad4f90ca8f7a896d83951804f0207082cafb /lib/cryptopp/rijndael.cpp
parent	Replacing CryptoPP with PolarSSL. (diff)
download	cuberite-34f13d589a2ebbcae9230732c7a763b3cdd88b41.tar cuberite-34f13d589a2ebbcae9230732c7a763b3cdd88b41.tar.gz cuberite-34f13d589a2ebbcae9230732c7a763b3cdd88b41.tar.bz2 cuberite-34f13d589a2ebbcae9230732c7a763b3cdd88b41.tar.lz cuberite-34f13d589a2ebbcae9230732c7a763b3cdd88b41.tar.xz cuberite-34f13d589a2ebbcae9230732c7a763b3cdd88b41.tar.zst cuberite-34f13d589a2ebbcae9230732c7a763b3cdd88b41.zip