Print this page
first pass


 139  * !__GNUC__ ifdefs.  Also removed ENCRYPTION, DECRYPTION,
 140  * AES_128, AES_192, AES_256, AES_VAR ifdefs.
 141  *
 142  * 2. Translate yasm/nasm %define and .macro definitions to cpp(1) #define
 143  *
 144  * 3. Translate yasm/nasm %ifdef/%ifndef to cpp(1) #ifdef
 145  *
 146  * 4. Translate Intel/yasm/nasm syntax to ATT/OpenSolaris as(1) syntax
 147  * (operands reversed, literals prefixed with "$", registers prefixed with "%",
 148  * and "[register+offset]", addressing changed to "offset(register)",
 149  * parenthesis in constant expressions "()" changed to square brackets "[]",
 150  * "." removed from  local (numeric) labels, and other changes.
 151  * Examples:
 152  * Intel/yasm/nasm Syntax       ATT/OpenSolaris Syntax
 153  * mov  rax,(4*20h)             mov     $[4*0x20],%rax
 154  * mov  rax,[ebx+20h]           mov     0x20(%ebx),%rax
 155  * lea  rax,[ebx+ecx]           lea     (%ebx,%ecx),%rax
 156  * sub  rax,[ebx+ecx*4-20h]     sub     -0x20(%ebx,%ecx,4),%rax
 157  *
 158  * 5. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
 159  * /usr/include/sys/asm_linkage.h, lint(1B) guards, EXPORT DELETE START
 160  * and EXPORT DELETE END markers, and dummy C function definitions for lint.
 161  *
 162  * 6. Renamed functions and reordered parameters to match OpenSolaris:
 163  * Original Gladman interface:
 164  *      int aes_encrypt(const unsigned char *in,
 165  *              unsigned char *out, const aes_encrypt_ctx cx[1])/
 166  *      int aes_decrypt(const unsigned char *in,
 167  *              unsigned char *out, const aes_encrypt_ctx cx[1])/
 168  * Note: aes_encrypt_ctx contains ks, a 60 element array of uint32_t,
 169  * and a union type, inf., containing inf.l, a uint32_t and
 170  * inf.b, a 4-element array of uint32_t.  Only b[0] in the array (aka "l") is
 171  * used and contains the key schedule length * 16 where key schedule length is
 172  * 10, 12, or 14 bytes.
 173  *
 174  * OpenSolaris OS interface:
 175  *      void aes_encrypt_amd64(const aes_ks_t *ks, int Nr,
 176  *              const uint32_t pt[4], uint32_t ct[4])/
 177  *      void aes_decrypt_amd64(const aes_ks_t *ks, int Nr,
 178  *              const uint32_t pt[4], uint32_t ct[4])/
 179  *      typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4]/
 180  *               uint32_t ks32[(MAX_AES_NR + 1) * 4]/ } aes_ks_t/


 310 #define kptr    %r8     /* key schedule pointer */
 311 #define fofs    128     /* adjust offset in key schedule to keep |disp| < 128 */
 312 #define fk_ref(x, y)    -16*x+fofs+4*y(kptr)
 313 
 314 #ifdef  AES_REV_DKS
 315 #define rofs            128
 316 #define ik_ref(x, y)    -16*x+rofs+4*y(kptr)
 317 
 318 #else
 319 #define rofs            -128
 320 #define ik_ref(x, y)    16*x+rofs+4*y(kptr)
 321 #endif  /* AES_REV_DKS */
 322 
 323 #define tab_0(x)        (tptr,x,8)
 324 #define tab_1(x)        3(tptr,x,8)
 325 #define tab_2(x)        2(tptr,x,8)
 326 #define tab_3(x)        1(tptr,x,8)
 327 #define tab_f(x)        1(tptr,x,8)
 328 #define tab_i(x)        7(tptr,x,8)
 329 
 330         /* EXPORT DELETE START */
 331 #define ff_rnd(p1, p2, p3, p4, round)   /* normal forward round */ \
 332         mov     fk_ref(round,0), p1; \
 333         mov     fk_ref(round,1), p2; \
 334         mov     fk_ref(round,2), p3; \
 335         mov     fk_ref(round,3), p4; \
 336  \
 337         movzx   %al, %esi; \
 338         movzx   %ah, %edi; \
 339         shr     $16, %eax; \
 340         xor     tab_0(%rsi), p1; \
 341         xor     tab_1(%rdi), p4; \
 342         movzx   %al, %esi; \
 343         movzx   %ah, %edi; \
 344         xor     tab_2(%rsi), p3; \
 345         xor     tab_3(%rdi), p2; \
 346  \
 347         movzx   %bl, %esi; \
 348         movzx   %bh, %edi; \
 349         shr     $16, %ebx; \
 350         xor     tab_0(%rsi), p2; \


 666         xor     %edi, p2; \
 667  \
 668         movzx   %dl, %esi; \
 669         movzx   %dh, %edi; \
 670         movzx   tab_i(%rsi), %esi; \
 671         movzx   tab_i(%rdi), %edi; \
 672         shr     $16, %edx; \
 673         xor     %esi, p4; \
 674         rol     $8, %edi; \
 675         xor     %edi, p1; \
 676         movzx   %dl, %esi; \
 677         movzx   %dh, %edi; \
 678         movzx   tab_i(%rsi), %esi; \
 679         movzx   tab_i(%rdi), %edi; \
 680         rol     $16, %esi; \
 681         rol     $24, %edi; \
 682         xor     %esi, p2; \
 683         xor     %edi, p3
 684 
 685 #endif  /* LAST_ROUND_TABLES */
 686         /* EXPORT DELETE END */
 687 
 688 /*
 689  * OpenSolaris OS:
 690  * void aes_encrypt_amd64(const aes_ks_t *ks, int Nr,
 691  *      const uint32_t pt[4], uint32_t ct[4])/
 692  *
 693  * Original interface:
 694  * int aes_encrypt(const unsigned char *in,
 695  *      unsigned char *out, const aes_encrypt_ctx cx[1])/
 696  */
 697         .align  64
 698 enc_tab:
 699         enc_vals(u8)
 700 #ifdef  LAST_ROUND_TABLES
 701         / Last Round Tables:
 702         enc_vals(w8)
 703 #endif
 704 
 705 
 706         ENTRY_NP(aes_encrypt_amd64)
 707         /* EXPORT DELETE START */
 708 #ifdef  GLADMAN_INTERFACE
 709         / Original interface
 710         sub     $[4*8], %rsp    / gnu/linux/opensolaris binary interface
 711         mov     %rsi, (%rsp)    / output pointer (P2)
 712         mov     %rdx, %r8       / context (P3)
 713 
 714         mov     %rbx, 1*8(%rsp) / P1: input pointer in rdi
 715         mov     %rbp, 2*8(%rsp) / P2: output pointer in (rsp)
 716         mov     %r12, 3*8(%rsp) / P3: context in r8
 717         movzx   4*KS_LENGTH(kptr), %esi / Get byte key length * 16
 718 
 719 #else
 720         / OpenSolaris OS interface
 721         sub     $[4*8], %rsp    / Make room on stack to save registers
 722         mov     %rcx, (%rsp)    / Save output pointer (P4) on stack
 723         mov     %rdi, %r8       / context (P1)
 724         mov     %rdx, %rdi      / P3: save input pointer
 725         shl     $4, %esi        / P2: esi byte key length * 16
 726 
 727         mov     %rbx, 1*8(%rsp) / Save registers


 769         ff_rnd(%r9d, %r10d, %r11d, %r12d,  6)
 770         ff_rnd(%r9d, %r10d, %r11d, %r12d,  5)
 771         ff_rnd(%r9d, %r10d, %r11d, %r12d,  4)
 772         ff_rnd(%r9d, %r10d, %r11d, %r12d,  3)
 773         ff_rnd(%r9d, %r10d, %r11d, %r12d,  2)
 774         ff_rnd(%r9d, %r10d, %r11d, %r12d,  1)
 775         fl_rnd(%r9d, %r10d, %r11d, %r12d,  0)
 776 
 777         / Copy results
 778         mov     (%rsp), %rbx
 779         mov     %r9d, (%rbx)
 780         mov     %r10d, 4(%rbx)
 781         mov     %r11d, 8(%rbx)
 782         mov     %r12d, 12(%rbx)
 783         xor     %rax, %rax
 784 4:      / Restore registers
 785         mov     1*8(%rsp), %rbx
 786         mov     2*8(%rsp), %rbp
 787         mov     3*8(%rsp), %r12
 788         add     $[4*8], %rsp
 789         /* EXPORT DELETE END */
 790         ret
 791 
 792         SET_SIZE(aes_encrypt_amd64)
 793 
 794 /*
 795  * OpenSolaris OS:
 796  * void aes_decrypt_amd64(const aes_ks_t *ks, int Nr,
 797  *      const uint32_t pt[4], uint32_t ct[4])/
 798  *
 799  * Original interface:
 800  * int aes_decrypt(const unsigned char *in,
 801  *      unsigned char *out, const aes_encrypt_ctx cx[1])/
 802  */
 803         .align  64
 804 dec_tab:
 805         dec_vals(v8)
 806 #ifdef  LAST_ROUND_TABLES
 807         / Last Round Tables:
 808         dec_vals(w8)
 809 #endif
 810 
 811 
 812         ENTRY_NP(aes_decrypt_amd64)
 813         /* EXPORT DELETE START */
 814 #ifdef  GLADMAN_INTERFACE
 815         / Original interface
 816         sub     $[4*8], %rsp    / gnu/linux/opensolaris binary interface
 817         mov     %rsi, (%rsp)    / output pointer (P2)
 818         mov     %rdx, %r8       / context (P3)
 819 
 820         mov     %rbx, 1*8(%rsp) / P1: input pointer in rdi
 821         mov     %rbp, 2*8(%rsp) / P2: output pointer in (rsp)
 822         mov     %r12, 3*8(%rsp) / P3: context in r8
 823         movzx   4*KS_LENGTH(kptr), %esi / Get byte key length * 16
 824 
 825 #else
 826         / OpenSolaris OS interface
 827         sub     $[4*8], %rsp    / Make room on stack to save registers
 828         mov     %rcx, (%rsp)    / Save output pointer (P4) on stack
 829         mov     %rdi, %r8       / context (P1)
 830         mov     %rdx, %rdi      / P3: save input pointer
 831         shl     $4, %esi        / P2: esi byte key length * 16
 832 
 833         mov     %rbx, 1*8(%rsp) / Save registers


 881         ii_rnd(%r9d, %r10d, %r11d, %r12d,  6)
 882         ii_rnd(%r9d, %r10d, %r11d, %r12d,  5)
 883         ii_rnd(%r9d, %r10d, %r11d, %r12d,  4)
 884         ii_rnd(%r9d, %r10d, %r11d, %r12d,  3)
 885         ii_rnd(%r9d, %r10d, %r11d, %r12d,  2)
 886         ii_rnd(%r9d, %r10d, %r11d, %r12d,  1)
 887         il_rnd(%r9d, %r10d, %r11d, %r12d,  0)
 888 
 889         / Copy results
 890         mov     (%rsp), %rbx
 891         mov     %r9d, (%rbx)
 892         mov     %r10d, 4(%rbx)
 893         mov     %r11d, 8(%rbx)
 894         mov     %r12d, 12(%rbx)
 895         xor     %rax, %rax
 896 4:      / Restore registers
 897         mov     1*8(%rsp), %rbx
 898         mov     2*8(%rsp), %rbp
 899         mov     3*8(%rsp), %r12
 900         add     $[4*8], %rsp
 901         /* EXPORT DELETE END */
 902         ret
 903 
 904         SET_SIZE(aes_decrypt_amd64)
 905 #endif  /* lint || __lint */


 139  * !__GNUC__ ifdefs.  Also removed ENCRYPTION, DECRYPTION,
 140  * AES_128, AES_192, AES_256, AES_VAR ifdefs.
 141  *
 142  * 2. Translate yasm/nasm %define and .macro definitions to cpp(1) #define
 143  *
 144  * 3. Translate yasm/nasm %ifdef/%ifndef to cpp(1) #ifdef
 145  *
 146  * 4. Translate Intel/yasm/nasm syntax to ATT/OpenSolaris as(1) syntax
 147  * (operands reversed, literals prefixed with "$", registers prefixed with "%",
 148  * and "[register+offset]", addressing changed to "offset(register)",
 149  * parenthesis in constant expressions "()" changed to square brackets "[]",
 150  * "." removed from  local (numeric) labels, and other changes.
 151  * Examples:
 152  * Intel/yasm/nasm Syntax       ATT/OpenSolaris Syntax
 153  * mov  rax,(4*20h)             mov     $[4*0x20],%rax
 154  * mov  rax,[ebx+20h]           mov     0x20(%ebx),%rax
 155  * lea  rax,[ebx+ecx]           lea     (%ebx,%ecx),%rax
 156  * sub  rax,[ebx+ecx*4-20h]     sub     -0x20(%ebx,%ecx,4),%rax
 157  *
 158  * 5. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
 159  * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
 160  * definitions for lint.
 161  *
 162  * 6. Renamed functions and reordered parameters to match OpenSolaris:
 163  * Original Gladman interface:
 164  *      int aes_encrypt(const unsigned char *in,
 165  *              unsigned char *out, const aes_encrypt_ctx cx[1])/
 166  *      int aes_decrypt(const unsigned char *in,
 167  *              unsigned char *out, const aes_encrypt_ctx cx[1])/
 168  * Note: aes_encrypt_ctx contains ks, a 60 element array of uint32_t,
 169  * and a union type, inf., containing inf.l, a uint32_t and
 170  * inf.b, a 4-element array of uint32_t.  Only b[0] in the array (aka "l") is
 171  * used and contains the key schedule length * 16 where key schedule length is
 172  * 10, 12, or 14 bytes.
 173  *
 174  * OpenSolaris OS interface:
 175  *      void aes_encrypt_amd64(const aes_ks_t *ks, int Nr,
 176  *              const uint32_t pt[4], uint32_t ct[4])/
 177  *      void aes_decrypt_amd64(const aes_ks_t *ks, int Nr,
 178  *              const uint32_t pt[4], uint32_t ct[4])/
 179  *      typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4]/
 180  *               uint32_t ks32[(MAX_AES_NR + 1) * 4]/ } aes_ks_t/


 310 #define kptr    %r8     /* key schedule pointer */
 311 #define fofs    128     /* adjust offset in key schedule to keep |disp| < 128 */
 312 #define fk_ref(x, y)    -16*x+fofs+4*y(kptr)
 313 
 314 #ifdef  AES_REV_DKS
 315 #define rofs            128
 316 #define ik_ref(x, y)    -16*x+rofs+4*y(kptr)
 317 
 318 #else
 319 #define rofs            -128
 320 #define ik_ref(x, y)    16*x+rofs+4*y(kptr)
 321 #endif  /* AES_REV_DKS */
 322 
 323 #define tab_0(x)        (tptr,x,8)
 324 #define tab_1(x)        3(tptr,x,8)
 325 #define tab_2(x)        2(tptr,x,8)
 326 #define tab_3(x)        1(tptr,x,8)
 327 #define tab_f(x)        1(tptr,x,8)
 328 #define tab_i(x)        7(tptr,x,8)
 329 

 330 #define ff_rnd(p1, p2, p3, p4, round)   /* normal forward round */ \
 331         mov     fk_ref(round,0), p1; \
 332         mov     fk_ref(round,1), p2; \
 333         mov     fk_ref(round,2), p3; \
 334         mov     fk_ref(round,3), p4; \
 335  \
 336         movzx   %al, %esi; \
 337         movzx   %ah, %edi; \
 338         shr     $16, %eax; \
 339         xor     tab_0(%rsi), p1; \
 340         xor     tab_1(%rdi), p4; \
 341         movzx   %al, %esi; \
 342         movzx   %ah, %edi; \
 343         xor     tab_2(%rsi), p3; \
 344         xor     tab_3(%rdi), p2; \
 345  \
 346         movzx   %bl, %esi; \
 347         movzx   %bh, %edi; \
 348         shr     $16, %ebx; \
 349         xor     tab_0(%rsi), p2; \


 665         xor     %edi, p2; \
 666  \
 667         movzx   %dl, %esi; \
 668         movzx   %dh, %edi; \
 669         movzx   tab_i(%rsi), %esi; \
 670         movzx   tab_i(%rdi), %edi; \
 671         shr     $16, %edx; \
 672         xor     %esi, p4; \
 673         rol     $8, %edi; \
 674         xor     %edi, p1; \
 675         movzx   %dl, %esi; \
 676         movzx   %dh, %edi; \
 677         movzx   tab_i(%rsi), %esi; \
 678         movzx   tab_i(%rdi), %edi; \
 679         rol     $16, %esi; \
 680         rol     $24, %edi; \
 681         xor     %esi, p2; \
 682         xor     %edi, p3
 683 
 684 #endif  /* LAST_ROUND_TABLES */

 685 
 686 /*
 687  * OpenSolaris OS:
 688  * void aes_encrypt_amd64(const aes_ks_t *ks, int Nr,
 689  *      const uint32_t pt[4], uint32_t ct[4])/
 690  *
 691  * Original interface:
 692  * int aes_encrypt(const unsigned char *in,
 693  *      unsigned char *out, const aes_encrypt_ctx cx[1])/
 694  */
 695         .align  64
 696 enc_tab:
 697         enc_vals(u8)
 698 #ifdef  LAST_ROUND_TABLES
 699         / Last Round Tables:
 700         enc_vals(w8)
 701 #endif
 702 
 703 
 704         ENTRY_NP(aes_encrypt_amd64)

 705 #ifdef  GLADMAN_INTERFACE
 706         / Original interface
 707         sub     $[4*8], %rsp    / gnu/linux/opensolaris binary interface
 708         mov     %rsi, (%rsp)    / output pointer (P2)
 709         mov     %rdx, %r8       / context (P3)
 710 
 711         mov     %rbx, 1*8(%rsp) / P1: input pointer in rdi
 712         mov     %rbp, 2*8(%rsp) / P2: output pointer in (rsp)
 713         mov     %r12, 3*8(%rsp) / P3: context in r8
 714         movzx   4*KS_LENGTH(kptr), %esi / Get byte key length * 16
 715 
 716 #else
 717         / OpenSolaris OS interface
 718         sub     $[4*8], %rsp    / Make room on stack to save registers
 719         mov     %rcx, (%rsp)    / Save output pointer (P4) on stack
 720         mov     %rdi, %r8       / context (P1)
 721         mov     %rdx, %rdi      / P3: save input pointer
 722         shl     $4, %esi        / P2: esi byte key length * 16
 723 
 724         mov     %rbx, 1*8(%rsp) / Save registers


 766         ff_rnd(%r9d, %r10d, %r11d, %r12d,  6)
 767         ff_rnd(%r9d, %r10d, %r11d, %r12d,  5)
 768         ff_rnd(%r9d, %r10d, %r11d, %r12d,  4)
 769         ff_rnd(%r9d, %r10d, %r11d, %r12d,  3)
 770         ff_rnd(%r9d, %r10d, %r11d, %r12d,  2)
 771         ff_rnd(%r9d, %r10d, %r11d, %r12d,  1)
 772         fl_rnd(%r9d, %r10d, %r11d, %r12d,  0)
 773 
 774         / Copy results
 775         mov     (%rsp), %rbx
 776         mov     %r9d, (%rbx)
 777         mov     %r10d, 4(%rbx)
 778         mov     %r11d, 8(%rbx)
 779         mov     %r12d, 12(%rbx)
 780         xor     %rax, %rax
 781 4:      / Restore registers
 782         mov     1*8(%rsp), %rbx
 783         mov     2*8(%rsp), %rbp
 784         mov     3*8(%rsp), %r12
 785         add     $[4*8], %rsp

 786         ret
 787 
 788         SET_SIZE(aes_encrypt_amd64)
 789 
 790 /*
 791  * OpenSolaris OS:
 792  * void aes_decrypt_amd64(const aes_ks_t *ks, int Nr,
 793  *      const uint32_t pt[4], uint32_t ct[4])/
 794  *
 795  * Original interface:
 796  * int aes_decrypt(const unsigned char *in,
 797  *      unsigned char *out, const aes_encrypt_ctx cx[1])/
 798  */
 799         .align  64
 800 dec_tab:
 801         dec_vals(v8)
 802 #ifdef  LAST_ROUND_TABLES
 803         / Last Round Tables:
 804         dec_vals(w8)
 805 #endif
 806 
 807 
 808         ENTRY_NP(aes_decrypt_amd64)

 809 #ifdef  GLADMAN_INTERFACE
 810         / Original interface
 811         sub     $[4*8], %rsp    / gnu/linux/opensolaris binary interface
 812         mov     %rsi, (%rsp)    / output pointer (P2)
 813         mov     %rdx, %r8       / context (P3)
 814 
 815         mov     %rbx, 1*8(%rsp) / P1: input pointer in rdi
 816         mov     %rbp, 2*8(%rsp) / P2: output pointer in (rsp)
 817         mov     %r12, 3*8(%rsp) / P3: context in r8
 818         movzx   4*KS_LENGTH(kptr), %esi / Get byte key length * 16
 819 
 820 #else
 821         / OpenSolaris OS interface
 822         sub     $[4*8], %rsp    / Make room on stack to save registers
 823         mov     %rcx, (%rsp)    / Save output pointer (P4) on stack
 824         mov     %rdi, %r8       / context (P1)
 825         mov     %rdx, %rdi      / P3: save input pointer
 826         shl     $4, %esi        / P2: esi byte key length * 16
 827 
 828         mov     %rbx, 1*8(%rsp) / Save registers


 876         ii_rnd(%r9d, %r10d, %r11d, %r12d,  6)
 877         ii_rnd(%r9d, %r10d, %r11d, %r12d,  5)
 878         ii_rnd(%r9d, %r10d, %r11d, %r12d,  4)
 879         ii_rnd(%r9d, %r10d, %r11d, %r12d,  3)
 880         ii_rnd(%r9d, %r10d, %r11d, %r12d,  2)
 881         ii_rnd(%r9d, %r10d, %r11d, %r12d,  1)
 882         il_rnd(%r9d, %r10d, %r11d, %r12d,  0)
 883 
 884         / Copy results
 885         mov     (%rsp), %rbx
 886         mov     %r9d, (%rbx)
 887         mov     %r10d, 4(%rbx)
 888         mov     %r11d, 8(%rbx)
 889         mov     %r12d, 12(%rbx)
 890         xor     %rax, %rax
 891 4:      / Restore registers
 892         mov     1*8(%rsp), %rbx
 893         mov     2*8(%rsp), %rbp
 894         mov     3*8(%rsp), %r12
 895         add     $[4*8], %rsp

 896         ret
 897 
 898         SET_SIZE(aes_decrypt_amd64)
 899 #endif  /* lint || __lint */