1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #define ARCFOUR_LOOP_OPTIMIZED
  27 
  28 #ifndef _KERNEL
  29 #include <stdint.h>
  30 #endif  /* _KERNEL */
  31 
  32 #include "arcfour.h"
  33 
  34 #if defined(__amd64)
  35 /* ARCFour_key.flag values */
  36 #define ARCFOUR_ON_INTEL        1
  37 #define ARCFOUR_ON_AMD64        0
  38 
  39 #ifdef _KERNEL
  40 #include <sys/x86_archext.h>
  41 #include <sys/cpuvar.h>
  42 
  43 #else
  44 #include <sys/auxv.h>
  45 #endif  /* _KERNEL */
  46 #endif  /* __amd64 */
  47 
  48 #ifndef __amd64
  49 /*
  50  * Initialize the key stream 'key' using the key value.
  51  *
  52  * Input:
  53  * keyval       User-provided key
  54  * keyvallen    Length, in bytes, of keyval
  55  * Output:
  56  * key          Initialized ARCFOUR key schedule, based on keyval
  57  */
  58 void
  59 arcfour_key_init(ARCFour_key *key, uchar_t *keyval, int keyvallen)
  60 {
  61 /* EXPORT DELETE START */
  62 
  63         uchar_t ext_keyval[256];
  64         uchar_t tmp;
  65         int i, j;
  66 
  67         /* Normalize key length to 256 */
  68         for (i = j = 0; i < 256; i++, j++) {
  69                 if (j == keyvallen)
  70                         j = 0;
  71                 ext_keyval[i] = keyval[j];
  72         }
  73 
  74         for (i = 0; i < 256; i++)
  75                 key->arr[i] = (uchar_t)i;
  76 
  77         j = 0;
  78         for (i = 0; i < 256; i++) {
  79                 j = (j + key->arr[i] + ext_keyval[i]) & 0xff;
  80                 tmp = key->arr[i];
  81                 key->arr[i] = key->arr[j];
  82                 key->arr[j] = tmp;
  83         }
  84         key->i = 0;
  85         key->j = 0;
  86 
  87 /* EXPORT DELETE END */
  88 }
  89 #endif  /* !__amd64 */
  90 
  91 
  92 /*
  93  * Encipher 'in' using 'key'.
  94  *
  95  * Input:
  96  * key          ARCFOUR key, initialized by arcfour_key_init()
  97  * in           Input text
  98  * out          Buffer to contain output text
  99  * len          Length, in bytes, of the in and out buffers
 100  *
 101  * Output:
 102  * out          Buffer containing output text
 103  *
 104  * Note: in and out can point to the same location
 105  */
 106 void
 107 arcfour_crypt(ARCFour_key *key, uchar_t *in, uchar_t *out, size_t len)
 108 {
 109 /* EXPORT DELETE START */
 110 #ifdef  __amd64
 111         if (key->flag == ARCFOUR_ON_AMD64) {
 112                 arcfour_crypt_asm(key, in, out, len);
 113         } else { /* Intel EM64T */
 114 #endif  /* amd64 */
 115 
 116         size_t          ii;
 117         uchar_t         i, j, ti, tj;
 118 #ifdef ARCFOUR_LOOP_OPTIMIZED
 119         uchar_t         arr_ij;
 120 #endif
 121 #ifdef __amd64
 122         uint32_t        *arr;
 123 #else
 124         uchar_t         *arr;
 125 #endif
 126 
 127 #ifdef  sun4u
 128         /*
 129          * The sun4u has a version of arcfour_crypt_aligned() hand-tuned for
 130          * the cases where the input and output buffers are aligned on
 131          * a multiple of 8-byte boundary.
 132          */
 133         int             index;
 134         uchar_t         tmp;
 135 
 136         index = (((uint64_t)(uintptr_t)in) & 0x7);
 137 
 138         /* Get the 'in' on an 8-byte alignment */
 139         if (index > 0) {
 140                 i = key->i;
 141                 j = key->j;
 142                 for (index = 8 - (uint64_t)(uintptr_t)in & 0x7;
 143                     (index-- > 0) && len > 0;
 144                     len--, in++, out++) {
 145                         ++i;
 146                         j = j + key->arr[i];
 147                         tmp = key->arr[i];
 148                         key->arr[i] = key->arr[j];
 149                         key->arr[j] = tmp;
 150                         tmp = key->arr[i] + key->arr[j];
 151                         *out = *in ^ key->arr[tmp];
 152                 }
 153                 key->i = i;
 154                 key->j = j;
 155         }
 156 
 157         if (len == 0)
 158                 return;
 159 
 160         /* See if we're fortunate and 'out' got aligned as well */
 161 
 162         if ((((uint64_t)(uintptr_t)out) & 7) != 0) {
 163 #endif  /* sun4u */
 164 
 165         i = key->i;
 166         j = key->j;
 167         arr = key->arr;
 168 
 169 #ifndef ARCFOUR_LOOP_OPTIMIZED
 170         /*
 171          * This loop is hasn't been reordered, but is kept for reference
 172          * purposes as it's more readable
 173          */
 174         for (ii = 0; ii < len; ++ii) {
 175                 ++i;
 176                 ti = arr[i];
 177                 j = j + ti;
 178                 tj = arr[j];
 179                 arr[j] = ti;
 180                 arr[i] = tj;
 181                 out[ii] = in[ii] ^ arr[(ti + tj) & 0xff];
 182         }
 183 
 184 #else
 185         /*
 186          * This for loop is optimized by carefully spreading out
 187          * memory access and storage to avoid conflicts,
 188          * allowing the processor to process operations in parallel
 189          */
 190 
 191         /* for loop setup */
 192         ++i;
 193         ti = arr[i];
 194         j = j + ti;
 195         tj = arr[j];
 196         arr[j] = ti;
 197         arr[i] = tj;
 198         arr_ij = arr[(ti + tj) & 0xff];
 199         --len;
 200 
 201         for (ii = 0; ii < len; ) {
 202                 ++i;
 203                 ti = arr[i];
 204                 j = j + ti;
 205                 tj = arr[j];
 206                 arr[j] = ti;
 207                 arr[i] = tj;
 208 
 209                 /* save result from previous loop: */
 210                 out[ii] = in[ii] ^ arr_ij;
 211 
 212                 ++ii;
 213                 arr_ij = arr[(ti + tj) & 0xff];
 214         }
 215         /* save result from last loop: */
 216         out[ii] = in[ii] ^ arr_ij;
 217 #endif
 218 
 219         key->i = i;
 220         key->j = j;
 221 
 222 #ifdef  sun4u
 223         } else {
 224                 arcfour_crypt_aligned(key, len, in, out);
 225         }
 226 #endif  /* sun4u */
 227 #ifdef  __amd64
 228         }
 229 #endif  /* amd64 */
 230 
 231 /* EXPORT DELETE END */
 232 }
 233 
 234 
 235 #ifdef  __amd64
 236 /*
 237  * Return 1 if executing on Intel, otherwise 0 (e.g., AMD64).
 238  * Cache the result, as the CPU can't change.
 239  *
 240  * Note: the userland version uses getisax() and checks for an AMD-64-only
 241  * feature.  The kernel version uses cpuid_getvendor().
 242  */
 243 int
 244 arcfour_crypt_on_intel(void)
 245 {
 246         static int      cached_result = -1;
 247 
 248         if (cached_result == -1) { /* first time */
 249 #ifdef _KERNEL
 250                 cached_result = (cpuid_getvendor(CPU) == X86_VENDOR_Intel);
 251 #else
 252                 uint_t  ui;
 253 
 254                 (void) getisax(&ui, 1);
 255                 cached_result = ((ui & AV_386_AMD_MMX) == 0);
 256 #endif  /* _KERNEL */
 257         }
 258 
 259         return (cached_result);
 260 }
 261 #endif  /* __amd64 */