mirror of
				https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson
				synced 2025-10-31 12:39:23 +00:00 
			
		
		
		
	 e6efaa0253
			
		
	
	
		e6efaa0253
		
	
	
	
	
		
			
			Original implementation of aesni_cbc_dec do not save IV if input length % 4 == 0. This will make decryption of next block failed. Signed-off-by: Huang Ying <ying.huang@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
		
			
				
	
	
		
			898 lines
		
	
	
		
			22 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			898 lines
		
	
	
		
			22 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| /*
 | |
|  * Implement AES algorithm in Intel AES-NI instructions.
 | |
|  *
 | |
|  * The white paper of AES-NI instructions can be downloaded from:
 | |
|  *   http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
 | |
|  *
 | |
|  * Copyright (C) 2008, Intel Corp.
 | |
|  *    Author: Huang Ying <ying.huang@intel.com>
 | |
|  *            Vinodh Gopal <vinodh.gopal@intel.com>
 | |
|  *            Kahraman Akdemir
 | |
|  *
 | |
|  * This program is free software; you can redistribute it and/or modify
 | |
|  * it under the terms of the GNU General Public License as published by
 | |
|  * the Free Software Foundation; either version 2 of the License, or
 | |
|  * (at your option) any later version.
 | |
|  */
 | |
| 
 | |
| #include <linux/linkage.h>
 | |
| 
 | |
| .text
 | |
| 
 | |
| #define STATE1	%xmm0
 | |
| #define STATE2	%xmm4
 | |
| #define STATE3	%xmm5
 | |
| #define STATE4	%xmm6
 | |
| #define STATE	STATE1
 | |
| #define IN1	%xmm1
 | |
| #define IN2	%xmm7
 | |
| #define IN3	%xmm8
 | |
| #define IN4	%xmm9
 | |
| #define IN	IN1
 | |
| #define KEY	%xmm2
 | |
| #define IV	%xmm3
 | |
| 
 | |
| #define KEYP	%rdi
 | |
| #define OUTP	%rsi
 | |
| #define INP	%rdx
 | |
| #define LEN	%rcx
 | |
| #define IVP	%r8
 | |
| #define KLEN	%r9d
 | |
| #define T1	%r10
 | |
| #define TKEYP	T1
 | |
| #define T2	%r11
 | |
| 
 | |
| _key_expansion_128:
 | |
| _key_expansion_256a:
 | |
| 	pshufd $0b11111111, %xmm1, %xmm1
 | |
| 	shufps $0b00010000, %xmm0, %xmm4
 | |
| 	pxor %xmm4, %xmm0
 | |
| 	shufps $0b10001100, %xmm0, %xmm4
 | |
| 	pxor %xmm4, %xmm0
 | |
| 	pxor %xmm1, %xmm0
 | |
| 	movaps %xmm0, (%rcx)
 | |
| 	add $0x10, %rcx
 | |
| 	ret
 | |
| 
 | |
| _key_expansion_192a:
 | |
| 	pshufd $0b01010101, %xmm1, %xmm1
 | |
| 	shufps $0b00010000, %xmm0, %xmm4
 | |
| 	pxor %xmm4, %xmm0
 | |
| 	shufps $0b10001100, %xmm0, %xmm4
 | |
| 	pxor %xmm4, %xmm0
 | |
| 	pxor %xmm1, %xmm0
 | |
| 
 | |
| 	movaps %xmm2, %xmm5
 | |
| 	movaps %xmm2, %xmm6
 | |
| 	pslldq $4, %xmm5
 | |
| 	pshufd $0b11111111, %xmm0, %xmm3
 | |
| 	pxor %xmm3, %xmm2
 | |
| 	pxor %xmm5, %xmm2
 | |
| 
 | |
| 	movaps %xmm0, %xmm1
 | |
| 	shufps $0b01000100, %xmm0, %xmm6
 | |
| 	movaps %xmm6, (%rcx)
 | |
| 	shufps $0b01001110, %xmm2, %xmm1
 | |
| 	movaps %xmm1, 16(%rcx)
 | |
| 	add $0x20, %rcx
 | |
| 	ret
 | |
| 
 | |
| _key_expansion_192b:
 | |
| 	pshufd $0b01010101, %xmm1, %xmm1
 | |
| 	shufps $0b00010000, %xmm0, %xmm4
 | |
| 	pxor %xmm4, %xmm0
 | |
| 	shufps $0b10001100, %xmm0, %xmm4
 | |
| 	pxor %xmm4, %xmm0
 | |
| 	pxor %xmm1, %xmm0
 | |
| 
 | |
| 	movaps %xmm2, %xmm5
 | |
| 	pslldq $4, %xmm5
 | |
| 	pshufd $0b11111111, %xmm0, %xmm3
 | |
| 	pxor %xmm3, %xmm2
 | |
| 	pxor %xmm5, %xmm2
 | |
| 
 | |
| 	movaps %xmm0, (%rcx)
 | |
| 	add $0x10, %rcx
 | |
| 	ret
 | |
| 
 | |
| _key_expansion_256b:
 | |
| 	pshufd $0b10101010, %xmm1, %xmm1
 | |
| 	shufps $0b00010000, %xmm2, %xmm4
 | |
| 	pxor %xmm4, %xmm2
 | |
| 	shufps $0b10001100, %xmm2, %xmm4
 | |
| 	pxor %xmm4, %xmm2
 | |
| 	pxor %xmm1, %xmm2
 | |
| 	movaps %xmm2, (%rcx)
 | |
| 	add $0x10, %rcx
 | |
| 	ret
 | |
| 
 | |
| /*
 | |
|  * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
 | |
|  *                   unsigned int key_len)
 | |
|  */
 | |
| ENTRY(aesni_set_key)
 | |
| 	movups (%rsi), %xmm0		# user key (first 16 bytes)
 | |
| 	movaps %xmm0, (%rdi)
 | |
| 	lea 0x10(%rdi), %rcx		# key addr
 | |
| 	movl %edx, 480(%rdi)
 | |
| 	pxor %xmm4, %xmm4		# xmm4 is assumed 0 in _key_expansion_x
 | |
| 	cmp $24, %dl
 | |
| 	jb .Lenc_key128
 | |
| 	je .Lenc_key192
 | |
| 	movups 0x10(%rsi), %xmm2	# other user key
 | |
| 	movaps %xmm2, (%rcx)
 | |
| 	add $0x10, %rcx
 | |
| 	# aeskeygenassist $0x1, %xmm2, %xmm1	# round 1
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
 | |
| 	call _key_expansion_256a
 | |
| 	# aeskeygenassist $0x1, %xmm0, %xmm1
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
 | |
| 	call _key_expansion_256b
 | |
| 	# aeskeygenassist $0x2, %xmm2, %xmm1	# round 2
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
 | |
| 	call _key_expansion_256a
 | |
| 	# aeskeygenassist $0x2, %xmm0, %xmm1
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02
 | |
| 	call _key_expansion_256b
 | |
| 	# aeskeygenassist $0x4, %xmm2, %xmm1	# round 3
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
 | |
| 	call _key_expansion_256a
 | |
| 	# aeskeygenassist $0x4, %xmm0, %xmm1
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04
 | |
| 	call _key_expansion_256b
 | |
| 	# aeskeygenassist $0x8, %xmm2, %xmm1	# round 4
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
 | |
| 	call _key_expansion_256a
 | |
| 	# aeskeygenassist $0x8, %xmm0, %xmm1
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08
 | |
| 	call _key_expansion_256b
 | |
| 	# aeskeygenassist $0x10, %xmm2, %xmm1	# round 5
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
 | |
| 	call _key_expansion_256a
 | |
| 	# aeskeygenassist $0x10, %xmm0, %xmm1
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10
 | |
| 	call _key_expansion_256b
 | |
| 	# aeskeygenassist $0x20, %xmm2, %xmm1	# round 6
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
 | |
| 	call _key_expansion_256a
 | |
| 	# aeskeygenassist $0x20, %xmm0, %xmm1
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20
 | |
| 	call _key_expansion_256b
 | |
| 	# aeskeygenassist $0x40, %xmm2, %xmm1	# round 7
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
 | |
| 	call _key_expansion_256a
 | |
| 	jmp .Ldec_key
 | |
| .Lenc_key192:
 | |
| 	movq 0x10(%rsi), %xmm2		# other user key
 | |
| 	# aeskeygenassist $0x1, %xmm2, %xmm1	# round 1
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
 | |
| 	call _key_expansion_192a
 | |
| 	# aeskeygenassist $0x2, %xmm2, %xmm1	# round 2
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
 | |
| 	call _key_expansion_192b
 | |
| 	# aeskeygenassist $0x4, %xmm2, %xmm1	# round 3
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
 | |
| 	call _key_expansion_192a
 | |
| 	# aeskeygenassist $0x8, %xmm2, %xmm1	# round 4
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
 | |
| 	call _key_expansion_192b
 | |
| 	# aeskeygenassist $0x10, %xmm2, %xmm1	# round 5
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
 | |
| 	call _key_expansion_192a
 | |
| 	# aeskeygenassist $0x20, %xmm2, %xmm1	# round 6
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
 | |
| 	call _key_expansion_192b
 | |
| 	# aeskeygenassist $0x40, %xmm2, %xmm1	# round 7
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
 | |
| 	call _key_expansion_192a
 | |
| 	# aeskeygenassist $0x80, %xmm2, %xmm1	# round 8
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x80
 | |
| 	call _key_expansion_192b
 | |
| 	jmp .Ldec_key
 | |
| .Lenc_key128:
 | |
| 	# aeskeygenassist $0x1, %xmm0, %xmm1	# round 1
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
 | |
| 	call _key_expansion_128
 | |
| 	# aeskeygenassist $0x2, %xmm0, %xmm1	# round 2
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02
 | |
| 	call _key_expansion_128
 | |
| 	# aeskeygenassist $0x4, %xmm0, %xmm1	# round 3
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04
 | |
| 	call _key_expansion_128
 | |
| 	# aeskeygenassist $0x8, %xmm0, %xmm1	# round 4
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08
 | |
| 	call _key_expansion_128
 | |
| 	# aeskeygenassist $0x10, %xmm0, %xmm1	# round 5
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10
 | |
| 	call _key_expansion_128
 | |
| 	# aeskeygenassist $0x20, %xmm0, %xmm1	# round 6
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20
 | |
| 	call _key_expansion_128
 | |
| 	# aeskeygenassist $0x40, %xmm0, %xmm1	# round 7
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x40
 | |
| 	call _key_expansion_128
 | |
| 	# aeskeygenassist $0x80, %xmm0, %xmm1	# round 8
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x80
 | |
| 	call _key_expansion_128
 | |
| 	# aeskeygenassist $0x1b, %xmm0, %xmm1	# round 9
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x1b
 | |
| 	call _key_expansion_128
 | |
| 	# aeskeygenassist $0x36, %xmm0, %xmm1	# round 10
 | |
| 	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x36
 | |
| 	call _key_expansion_128
 | |
| .Ldec_key:
 | |
| 	sub $0x10, %rcx
 | |
| 	movaps (%rdi), %xmm0
 | |
| 	movaps (%rcx), %xmm1
 | |
| 	movaps %xmm0, 240(%rcx)
 | |
| 	movaps %xmm1, 240(%rdi)
 | |
| 	add $0x10, %rdi
 | |
| 	lea 240-16(%rcx), %rsi
 | |
| .align 4
 | |
| .Ldec_key_loop:
 | |
| 	movaps (%rdi), %xmm0
 | |
| 	# aesimc %xmm0, %xmm1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdb, 0xc8
 | |
| 	movaps %xmm1, (%rsi)
 | |
| 	add $0x10, %rdi
 | |
| 	sub $0x10, %rsi
 | |
| 	cmp %rcx, %rdi
 | |
| 	jb .Ldec_key_loop
 | |
| 	xor %rax, %rax
 | |
| 	ret
 | |
| 
 | |
| /*
 | |
|  * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
 | |
|  */
 | |
| ENTRY(aesni_enc)
 | |
| 	movl 480(KEYP), KLEN		# key length
 | |
| 	movups (INP), STATE		# input
 | |
| 	call _aesni_enc1
 | |
| 	movups STATE, (OUTP)		# output
 | |
| 	ret
 | |
| 
 | |
| /*
 | |
|  * _aesni_enc1:		internal ABI
 | |
|  * input:
 | |
|  *	KEYP:		key struct pointer
 | |
|  *	KLEN:		round count
 | |
|  *	STATE:		initial state (input)
 | |
|  * output:
 | |
|  *	STATE:		finial state (output)
 | |
|  * changed:
 | |
|  *	KEY
 | |
|  *	TKEYP (T1)
 | |
|  */
 | |
| _aesni_enc1:
 | |
| 	movaps (KEYP), KEY		# key
 | |
| 	mov KEYP, TKEYP
 | |
| 	pxor KEY, STATE		# round 0
 | |
| 	add $0x30, TKEYP
 | |
| 	cmp $24, KLEN
 | |
| 	jb .Lenc128
 | |
| 	lea 0x20(TKEYP), TKEYP
 | |
| 	je .Lenc192
 | |
| 	add $0x20, TKEYP
 | |
| 	movaps -0x60(TKEYP), KEY
 | |
| 	# aesenc KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| 	movaps -0x50(TKEYP), KEY
 | |
| 	# aesenc KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| .align 4
 | |
| .Lenc192:
 | |
| 	movaps -0x40(TKEYP), KEY
 | |
| 	# aesenc KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| 	movaps -0x30(TKEYP), KEY
 | |
| 	# aesenc KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| .align 4
 | |
| .Lenc128:
 | |
| 	movaps -0x20(TKEYP), KEY
 | |
| 	# aesenc KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| 	movaps -0x10(TKEYP), KEY
 | |
| 	# aesenc KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| 	movaps (TKEYP), KEY
 | |
| 	# aesenc KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| 	movaps 0x10(TKEYP), KEY
 | |
| 	# aesenc KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| 	movaps 0x20(TKEYP), KEY
 | |
| 	# aesenc KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| 	movaps 0x30(TKEYP), KEY
 | |
| 	# aesenc KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| 	movaps 0x40(TKEYP), KEY
 | |
| 	# aesenc KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| 	movaps 0x50(TKEYP), KEY
 | |
| 	# aesenc KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| 	movaps 0x60(TKEYP), KEY
 | |
| 	# aesenc KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| 	movaps 0x70(TKEYP), KEY
 | |
| 	# aesenclast KEY, STATE	# last round
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdd, 0xc2
 | |
| 	ret
 | |
| 
 | |
| /*
 | |
|  * _aesni_enc4:	internal ABI
 | |
|  * input:
 | |
|  *	KEYP:		key struct pointer
 | |
|  *	KLEN:		round count
 | |
|  *	STATE1:		initial state (input)
 | |
|  *	STATE2
 | |
|  *	STATE3
 | |
|  *	STATE4
 | |
|  * output:
 | |
|  *	STATE1:		finial state (output)
 | |
|  *	STATE2
 | |
|  *	STATE3
 | |
|  *	STATE4
 | |
|  * changed:
 | |
|  *	KEY
 | |
|  *	TKEYP (T1)
 | |
|  */
 | |
| _aesni_enc4:
 | |
| 	movaps (KEYP), KEY		# key
 | |
| 	mov KEYP, TKEYP
 | |
| 	pxor KEY, STATE1		# round 0
 | |
| 	pxor KEY, STATE2
 | |
| 	pxor KEY, STATE3
 | |
| 	pxor KEY, STATE4
 | |
| 	add $0x30, TKEYP
 | |
| 	cmp $24, KLEN
 | |
| 	jb .L4enc128
 | |
| 	lea 0x20(TKEYP), TKEYP
 | |
| 	je .L4enc192
 | |
| 	add $0x20, TKEYP
 | |
| 	movaps -0x60(TKEYP), KEY
 | |
| 	# aesenc KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| 	# aesenc KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
 | |
| 	# aesenc KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
 | |
| 	# aesenc KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
 | |
| 	movaps -0x50(TKEYP), KEY
 | |
| 	# aesenc KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| 	# aesenc KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
 | |
| 	# aesenc KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
 | |
| 	# aesenc KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
 | |
| #.align 4
 | |
| .L4enc192:
 | |
| 	movaps -0x40(TKEYP), KEY
 | |
| 	# aesenc KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| 	# aesenc KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
 | |
| 	# aesenc KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
 | |
| 	# aesenc KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
 | |
| 	movaps -0x30(TKEYP), KEY
 | |
| 	# aesenc KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| 	# aesenc KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
 | |
| 	# aesenc KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
 | |
| 	# aesenc KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
 | |
| #.align 4
 | |
| .L4enc128:
 | |
| 	movaps -0x20(TKEYP), KEY
 | |
| 	# aesenc KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| 	# aesenc KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
 | |
| 	# aesenc KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
 | |
| 	# aesenc KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
 | |
| 	movaps -0x10(TKEYP), KEY
 | |
| 	# aesenc KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| 	# aesenc KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
 | |
| 	# aesenc KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
 | |
| 	# aesenc KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
 | |
| 	movaps (TKEYP), KEY
 | |
| 	# aesenc KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| 	# aesenc KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
 | |
| 	# aesenc KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
 | |
| 	# aesenc KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
 | |
| 	movaps 0x10(TKEYP), KEY
 | |
| 	# aesenc KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| 	# aesenc KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
 | |
| 	# aesenc KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
 | |
| 	# aesenc KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
 | |
| 	movaps 0x20(TKEYP), KEY
 | |
| 	# aesenc KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| 	# aesenc KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
 | |
| 	# aesenc KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
 | |
| 	# aesenc KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
 | |
| 	movaps 0x30(TKEYP), KEY
 | |
| 	# aesenc KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| 	# aesenc KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
 | |
| 	# aesenc KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
 | |
| 	# aesenc KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
 | |
| 	movaps 0x40(TKEYP), KEY
 | |
| 	# aesenc KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| 	# aesenc KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
 | |
| 	# aesenc KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
 | |
| 	# aesenc KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
 | |
| 	movaps 0x50(TKEYP), KEY
 | |
| 	# aesenc KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| 	# aesenc KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
 | |
| 	# aesenc KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
 | |
| 	# aesenc KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
 | |
| 	movaps 0x60(TKEYP), KEY
 | |
| 	# aesenc KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
 | |
| 	# aesenc KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
 | |
| 	# aesenc KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
 | |
| 	# aesenc KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
 | |
| 	movaps 0x70(TKEYP), KEY
 | |
| 	# aesenclast KEY, STATE1	# last round
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdd, 0xc2
 | |
| 	# aesenclast KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdd, 0xe2
 | |
| 	# aesenclast KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdd, 0xea
 | |
| 	# aesenclast KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdd, 0xf2
 | |
| 	ret
 | |
| 
 | |
| /*
 | |
|  * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
 | |
|  */
 | |
| ENTRY(aesni_dec)
 | |
| 	mov 480(KEYP), KLEN		# key length
 | |
| 	add $240, KEYP
 | |
| 	movups (INP), STATE		# input
 | |
| 	call _aesni_dec1
 | |
| 	movups STATE, (OUTP)		#output
 | |
| 	ret
 | |
| 
 | |
| /*
 | |
|  * _aesni_dec1:		internal ABI
 | |
|  * input:
 | |
|  *	KEYP:		key struct pointer
 | |
|  *	KLEN:		key length
 | |
|  *	STATE:		initial state (input)
 | |
|  * output:
 | |
|  *	STATE:		finial state (output)
 | |
|  * changed:
 | |
|  *	KEY
 | |
|  *	TKEYP (T1)
 | |
|  */
 | |
| _aesni_dec1:
 | |
| 	movaps (KEYP), KEY		# key
 | |
| 	mov KEYP, TKEYP
 | |
| 	pxor KEY, STATE		# round 0
 | |
| 	add $0x30, TKEYP
 | |
| 	cmp $24, KLEN
 | |
| 	jb .Ldec128
 | |
| 	lea 0x20(TKEYP), TKEYP
 | |
| 	je .Ldec192
 | |
| 	add $0x20, TKEYP
 | |
| 	movaps -0x60(TKEYP), KEY
 | |
| 	# aesdec KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| 	movaps -0x50(TKEYP), KEY
 | |
| 	# aesdec KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| .align 4
 | |
| .Ldec192:
 | |
| 	movaps -0x40(TKEYP), KEY
 | |
| 	# aesdec KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| 	movaps -0x30(TKEYP), KEY
 | |
| 	# aesdec KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| .align 4
 | |
| .Ldec128:
 | |
| 	movaps -0x20(TKEYP), KEY
 | |
| 	# aesdec KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| 	movaps -0x10(TKEYP), KEY
 | |
| 	# aesdec KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| 	movaps (TKEYP), KEY
 | |
| 	# aesdec KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| 	movaps 0x10(TKEYP), KEY
 | |
| 	# aesdec KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| 	movaps 0x20(TKEYP), KEY
 | |
| 	# aesdec KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| 	movaps 0x30(TKEYP), KEY
 | |
| 	# aesdec KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| 	movaps 0x40(TKEYP), KEY
 | |
| 	# aesdec KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| 	movaps 0x50(TKEYP), KEY
 | |
| 	# aesdec KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| 	movaps 0x60(TKEYP), KEY
 | |
| 	# aesdec KEY, STATE
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| 	movaps 0x70(TKEYP), KEY
 | |
| 	# aesdeclast KEY, STATE		# last round
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdf, 0xc2
 | |
| 	ret
 | |
| 
 | |
| /*
 | |
|  * _aesni_dec4:	internal ABI
 | |
|  * input:
 | |
|  *	KEYP:		key struct pointer
 | |
|  *	KLEN:		key length
 | |
|  *	STATE1:		initial state (input)
 | |
|  *	STATE2
 | |
|  *	STATE3
 | |
|  *	STATE4
 | |
|  * output:
 | |
|  *	STATE1:		finial state (output)
 | |
|  *	STATE2
 | |
|  *	STATE3
 | |
|  *	STATE4
 | |
|  * changed:
 | |
|  *	KEY
 | |
|  *	TKEYP (T1)
 | |
|  */
 | |
| _aesni_dec4:
 | |
| 	movaps (KEYP), KEY		# key
 | |
| 	mov KEYP, TKEYP
 | |
| 	pxor KEY, STATE1		# round 0
 | |
| 	pxor KEY, STATE2
 | |
| 	pxor KEY, STATE3
 | |
| 	pxor KEY, STATE4
 | |
| 	add $0x30, TKEYP
 | |
| 	cmp $24, KLEN
 | |
| 	jb .L4dec128
 | |
| 	lea 0x20(TKEYP), TKEYP
 | |
| 	je .L4dec192
 | |
| 	add $0x20, TKEYP
 | |
| 	movaps -0x60(TKEYP), KEY
 | |
| 	# aesdec KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| 	# aesdec KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
 | |
| 	# aesdec KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
 | |
| 	# aesdec KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
 | |
| 	movaps -0x50(TKEYP), KEY
 | |
| 	# aesdec KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| 	# aesdec KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
 | |
| 	# aesdec KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
 | |
| 	# aesdec KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
 | |
| .align 4
 | |
| .L4dec192:
 | |
| 	movaps -0x40(TKEYP), KEY
 | |
| 	# aesdec KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| 	# aesdec KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
 | |
| 	# aesdec KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
 | |
| 	# aesdec KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
 | |
| 	movaps -0x30(TKEYP), KEY
 | |
| 	# aesdec KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| 	# aesdec KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
 | |
| 	# aesdec KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
 | |
| 	# aesdec KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
 | |
| .align 4
 | |
| .L4dec128:
 | |
| 	movaps -0x20(TKEYP), KEY
 | |
| 	# aesdec KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| 	# aesdec KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
 | |
| 	# aesdec KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
 | |
| 	# aesdec KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
 | |
| 	movaps -0x10(TKEYP), KEY
 | |
| 	# aesdec KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| 	# aesdec KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
 | |
| 	# aesdec KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
 | |
| 	# aesdec KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
 | |
| 	movaps (TKEYP), KEY
 | |
| 	# aesdec KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| 	# aesdec KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
 | |
| 	# aesdec KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
 | |
| 	# aesdec KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
 | |
| 	movaps 0x10(TKEYP), KEY
 | |
| 	# aesdec KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| 	# aesdec KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
 | |
| 	# aesdec KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
 | |
| 	# aesdec KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
 | |
| 	movaps 0x20(TKEYP), KEY
 | |
| 	# aesdec KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| 	# aesdec KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
 | |
| 	# aesdec KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
 | |
| 	# aesdec KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
 | |
| 	movaps 0x30(TKEYP), KEY
 | |
| 	# aesdec KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| 	# aesdec KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
 | |
| 	# aesdec KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
 | |
| 	# aesdec KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
 | |
| 	movaps 0x40(TKEYP), KEY
 | |
| 	# aesdec KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| 	# aesdec KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
 | |
| 	# aesdec KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
 | |
| 	# aesdec KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
 | |
| 	movaps 0x50(TKEYP), KEY
 | |
| 	# aesdec KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| 	# aesdec KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
 | |
| 	# aesdec KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
 | |
| 	# aesdec KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
 | |
| 	movaps 0x60(TKEYP), KEY
 | |
| 	# aesdec KEY, STATE1
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
 | |
| 	# aesdec KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
 | |
| 	# aesdec KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
 | |
| 	# aesdec KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
 | |
| 	movaps 0x70(TKEYP), KEY
 | |
| 	# aesdeclast KEY, STATE1	# last round
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdf, 0xc2
 | |
| 	# aesdeclast KEY, STATE2
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdf, 0xe2
 | |
| 	# aesdeclast KEY, STATE3
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdf, 0xea
 | |
| 	# aesdeclast KEY, STATE4
 | |
| 	.byte 0x66, 0x0f, 0x38, 0xdf, 0xf2
 | |
| 	ret
 | |
| 
 | |
| /*
 | |
|  * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
 | |
|  *		      size_t len)
 | |
|  */
 | |
| ENTRY(aesni_ecb_enc)
 | |
| 	test LEN, LEN		# check length
 | |
| 	jz .Lecb_enc_ret
 | |
| 	mov 480(KEYP), KLEN
 | |
| 	cmp $16, LEN
 | |
| 	jb .Lecb_enc_ret
 | |
| 	cmp $64, LEN
 | |
| 	jb .Lecb_enc_loop1
 | |
| .align 4
 | |
| .Lecb_enc_loop4:
 | |
| 	movups (INP), STATE1
 | |
| 	movups 0x10(INP), STATE2
 | |
| 	movups 0x20(INP), STATE3
 | |
| 	movups 0x30(INP), STATE4
 | |
| 	call _aesni_enc4
 | |
| 	movups STATE1, (OUTP)
 | |
| 	movups STATE2, 0x10(OUTP)
 | |
| 	movups STATE3, 0x20(OUTP)
 | |
| 	movups STATE4, 0x30(OUTP)
 | |
| 	sub $64, LEN
 | |
| 	add $64, INP
 | |
| 	add $64, OUTP
 | |
| 	cmp $64, LEN
 | |
| 	jge .Lecb_enc_loop4
 | |
| 	cmp $16, LEN
 | |
| 	jb .Lecb_enc_ret
 | |
| .align 4
 | |
| .Lecb_enc_loop1:
 | |
| 	movups (INP), STATE1
 | |
| 	call _aesni_enc1
 | |
| 	movups STATE1, (OUTP)
 | |
| 	sub $16, LEN
 | |
| 	add $16, INP
 | |
| 	add $16, OUTP
 | |
| 	cmp $16, LEN
 | |
| 	jge .Lecb_enc_loop1
 | |
| .Lecb_enc_ret:
 | |
| 	ret
 | |
| 
 | |
| /*
 | |
|  * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
 | |
|  *		      size_t len);
 | |
|  */
 | |
| ENTRY(aesni_ecb_dec)
 | |
| 	test LEN, LEN
 | |
| 	jz .Lecb_dec_ret
 | |
| 	mov 480(KEYP), KLEN
 | |
| 	add $240, KEYP
 | |
| 	cmp $16, LEN
 | |
| 	jb .Lecb_dec_ret
 | |
| 	cmp $64, LEN
 | |
| 	jb .Lecb_dec_loop1
 | |
| .align 4
 | |
| .Lecb_dec_loop4:
 | |
| 	movups (INP), STATE1
 | |
| 	movups 0x10(INP), STATE2
 | |
| 	movups 0x20(INP), STATE3
 | |
| 	movups 0x30(INP), STATE4
 | |
| 	call _aesni_dec4
 | |
| 	movups STATE1, (OUTP)
 | |
| 	movups STATE2, 0x10(OUTP)
 | |
| 	movups STATE3, 0x20(OUTP)
 | |
| 	movups STATE4, 0x30(OUTP)
 | |
| 	sub $64, LEN
 | |
| 	add $64, INP
 | |
| 	add $64, OUTP
 | |
| 	cmp $64, LEN
 | |
| 	jge .Lecb_dec_loop4
 | |
| 	cmp $16, LEN
 | |
| 	jb .Lecb_dec_ret
 | |
| .align 4
 | |
| .Lecb_dec_loop1:
 | |
| 	movups (INP), STATE1
 | |
| 	call _aesni_dec1
 | |
| 	movups STATE1, (OUTP)
 | |
| 	sub $16, LEN
 | |
| 	add $16, INP
 | |
| 	add $16, OUTP
 | |
| 	cmp $16, LEN
 | |
| 	jge .Lecb_dec_loop1
 | |
| .Lecb_dec_ret:
 | |
| 	ret
 | |
| 
 | |
| /*
 | |
|  * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
 | |
|  *		      size_t len, u8 *iv)
 | |
|  */
 | |
| ENTRY(aesni_cbc_enc)
 | |
| 	cmp $16, LEN
 | |
| 	jb .Lcbc_enc_ret
 | |
| 	mov 480(KEYP), KLEN
 | |
| 	movups (IVP), STATE	# load iv as initial state
 | |
| .align 4
 | |
| .Lcbc_enc_loop:
 | |
| 	movups (INP), IN	# load input
 | |
| 	pxor IN, STATE
 | |
| 	call _aesni_enc1
 | |
| 	movups STATE, (OUTP)	# store output
 | |
| 	sub $16, LEN
 | |
| 	add $16, INP
 | |
| 	add $16, OUTP
 | |
| 	cmp $16, LEN
 | |
| 	jge .Lcbc_enc_loop
 | |
| 	movups STATE, (IVP)
 | |
| .Lcbc_enc_ret:
 | |
| 	ret
 | |
| 
 | |
| /*
 | |
|  * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
 | |
|  *		      size_t len, u8 *iv)
 | |
|  */
 | |
| ENTRY(aesni_cbc_dec)
 | |
| 	cmp $16, LEN
 | |
| 	jb .Lcbc_dec_just_ret
 | |
| 	mov 480(KEYP), KLEN
 | |
| 	add $240, KEYP
 | |
| 	movups (IVP), IV
 | |
| 	cmp $64, LEN
 | |
| 	jb .Lcbc_dec_loop1
 | |
| .align 4
 | |
| .Lcbc_dec_loop4:
 | |
| 	movups (INP), IN1
 | |
| 	movaps IN1, STATE1
 | |
| 	movups 0x10(INP), IN2
 | |
| 	movaps IN2, STATE2
 | |
| 	movups 0x20(INP), IN3
 | |
| 	movaps IN3, STATE3
 | |
| 	movups 0x30(INP), IN4
 | |
| 	movaps IN4, STATE4
 | |
| 	call _aesni_dec4
 | |
| 	pxor IV, STATE1
 | |
| 	pxor IN1, STATE2
 | |
| 	pxor IN2, STATE3
 | |
| 	pxor IN3, STATE4
 | |
| 	movaps IN4, IV
 | |
| 	movups STATE1, (OUTP)
 | |
| 	movups STATE2, 0x10(OUTP)
 | |
| 	movups STATE3, 0x20(OUTP)
 | |
| 	movups STATE4, 0x30(OUTP)
 | |
| 	sub $64, LEN
 | |
| 	add $64, INP
 | |
| 	add $64, OUTP
 | |
| 	cmp $64, LEN
 | |
| 	jge .Lcbc_dec_loop4
 | |
| 	cmp $16, LEN
 | |
| 	jb .Lcbc_dec_ret
 | |
| .align 4
 | |
| .Lcbc_dec_loop1:
 | |
| 	movups (INP), IN
 | |
| 	movaps IN, STATE
 | |
| 	call _aesni_dec1
 | |
| 	pxor IV, STATE
 | |
| 	movups STATE, (OUTP)
 | |
| 	movaps IN, IV
 | |
| 	sub $16, LEN
 | |
| 	add $16, INP
 | |
| 	add $16, OUTP
 | |
| 	cmp $16, LEN
 | |
| 	jge .Lcbc_dec_loop1
 | |
| .Lcbc_dec_ret:
 | |
| 	movups IV, (IVP)
 | |
| .Lcbc_dec_just_ret:
 | |
| 	ret
 |