1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 #include "../arcfour.h"
  27 
  28 /* Initialize the key stream 'key' using the key value */
  29 void
  30 arcfour_key_init(ARCFour_key *key, uchar_t *keyval, int keyvallen)
  31 {
  32 /* EXPORT DELETE START */
  33 
  34         uchar_t ext_keyval[256];
  35         uchar_t tmp;
  36         int i, j;
  37 
  38         for (i = j = 0; i < 256; i++, j++) {
  39                 if (j == keyvallen)
  40                         j = 0;
  41 
  42                 ext_keyval[i] = keyval[j];
  43         }
  44         for (i = 0; i < 256; i++)
  45                 key->arr[i] = (uchar_t)i;
  46 
  47         j = 0;
  48         for (i = 0; i < 256; i++) {
  49                 j = (j + key->arr[i] + ext_keyval[i]) % 256;
  50                 tmp = key->arr[i];
  51                 key->arr[i] = key->arr[j];
  52                 key->arr[j] = tmp;
  53         }
  54         key->i = 0;
  55         key->j = 0;
  56 
  57 /* EXPORT DELETE END */
  58 }
  59 
  60 
  61 /*
  62  * Encipher 'in' using 'key.
  63  * in and out can point to the same location
  64  */
  65 void
  66 arcfour_crypt(ARCFour_key *key, uchar_t *in, uchar_t *out, size_t len)
  67 {
  68         size_t ii;
  69         unsigned long long in0, merge = 0, merge0 = 0, merge1, mask = 0;
  70         uchar_t i, j, *base, jj, *base1, tmp;
  71         unsigned int tmp0, tmp1, i_accum, shift = 0, i1;
  72 
  73 
  74 /* EXPORT DELETE START */
  75         int index;
  76 
  77         base = key->arr;
  78 
  79         index = (((uintptr_t)in) & 0x7);
  80 
  81         /* Get the 'in' on an 8-byte alignment */
  82         if (index > 0) {
  83                 i = key->i;
  84                 j = key->j;
  85 
  86                 for (index = 8 - index; (index-- > 0) && len > 0;
  87                     len--, in++, out++) {
  88 
  89                         i = i + 1;
  90                         j = j + key->arr[i];
  91                         tmp = key->arr[i];
  92                         key->arr[i] = key->arr[j];
  93                         key->arr[j] = tmp;
  94                         tmp = key->arr[i] + key->arr[j];
  95                         *out = *in ^ key->arr[tmp];
  96                 }
  97                 key->i = i;
  98                 key->j = j;
  99 
 100         }
 101         if (len == 0)
 102                 return;
 103 
 104         /* See if we're fortunate and 'out' got aligned as well */
 105 
 106 
 107         /*
 108          * Niagara optimized version for
 109          * the cases where the input and output  buffers are aligned on
 110          * a multiple of 8-byte boundary.
 111          */
 112 #ifdef  sun4v
 113         if ((((uintptr_t)out) & 7) != 0) {
 114 #endif  /* sun4v */
 115                 i = key->i;
 116                 j = key->j;
 117                 for (ii = 0; ii < len; ii++) {
 118                         i = i + 1;
 119                         tmp0 = base[i];
 120                         j = j + tmp0;
 121                         tmp1 = base[j];
 122                         base[i] = (uchar_t)tmp1;
 123                         base[j] = (uchar_t)tmp0;
 124                         tmp0 += tmp1;
 125                         tmp0 = tmp0 & 0xff;
 126                         out[ii] = in[ii] ^ base[tmp0];
 127                 }
 128                 key->i = i;
 129                 key->j = j;
 130 #ifdef  sun4v
 131         } else {
 132                 i = key->i;
 133                 j = key->j;
 134 
 135                 /*
 136                  * Want to align base[i] on a 2B boundary -- allows updates
 137                  * via [i] to be performed in 2B chunks (reducing # of stores).
 138                  * Requires appropriate alias detection.
 139                  */
 140 
 141                 if (((i+1) % 2) != 0) {
 142                         i = i + 1;
 143                         tmp0 = base[i];
 144                         j = j + tmp0;
 145                         tmp1 = base[j];
 146 
 147                         base[i] = (uchar_t)tmp1;
 148                         base[j] = (uchar_t)tmp0;
 149 
 150                         tmp0 += tmp1;
 151                         tmp0 = tmp0 & 0xff;
 152 
 153                         merge0 = (unsigned long long)(base[tmp0]) << 56;
 154                         shift = 8; mask = 0xff;
 155                 }
 156 
 157                 /*
 158                  * Note - in and out may now be misaligned -
 159                  * as updating [out] in 8B chunks need to handle this
 160                  * possibility. Also could have a 1B overrun.
 161                  * Need to drop out of loop early as a result.
 162                  */
 163 
 164                 for (ii = 0, i1 = i; ii < ((len-1)  & (~7));
 165                     ii += 8, i1 = i1&0xff) {
 166 
 167                         /*
 168                          * If i < less than 248, know wont wrap around
 169                          * (i % 256), so don't need to bother with masking i
 170                          * after each increment
 171                          */
 172                         if (i1 < 248) {
 173 
 174                                 /* BYTE 0 */
 175                                 i1 = (i1 + 1);
 176 
 177                                 /*
 178                                  * Creating this base pointer reduces subsequent
 179                                  * arihmetic ops required to load [i]
 180                                  *
 181                                  * N.B. don't need to check if [j] aliases.
 182                                  * [i] and [j] end up with the same values
 183                                  * anyway.
 184                                  */
 185                                 base1 = &base[i1];
 186 
 187                                 tmp0 = base1[0];
 188                                 j = j + tmp0;
 189 
 190                                 tmp1 = base[j];
 191                                 /*
 192                                  * Don't store [i] yet
 193                                  */
 194                                 i_accum = tmp1;
 195                                 base[j] = (uchar_t)tmp0;
 196 
 197                                 tmp0 += tmp1;
 198                                 tmp0 = tmp0 & 0xff;
 199 
 200                                 /*
 201                                  * Check [tmp0] doesn't alias with [i]
 202                                  */
 203 
 204                                 /*
 205                                  * Updating [out] in 8B chunks
 206                                  */
 207                                 if (i1 == tmp0) {
 208                                         merge =
 209                                             (unsigned long long)(i_accum) << 56;
 210                                 } else {
 211                                         merge =
 212                                             (unsigned long long)(base[tmp0]) <<
 213                                             56;
 214                                 }
 215 
 216                                 /* BYTE 1 */
 217                                 tmp0 = base1[1];
 218 
 219                                 j = j + tmp0;
 220 
 221                                 /*
 222                                  * [j] can now alias with [i] and [i-1]
 223                                  * If alias abort speculation
 224                                  */
 225                                 if ((i1 ^ j) < 2) {
 226                                         base1[0] = (uchar_t)i_accum;
 227 
 228                                         tmp1 = base[j];
 229 
 230                                         base1[1] = (uchar_t)tmp1;
 231                                         base[j] = (uchar_t)tmp0;
 232 
 233                                         tmp0 += tmp1;
 234                                         tmp0 = tmp0 & 0xff;
 235 
 236                                         merge |= (unsigned long long)
 237                                             (base[tmp0]) << 48;
 238                                 } else {
 239 
 240                                         tmp1 = base[j];
 241 
 242                                         i_accum = i_accum << 8;
 243                                         i_accum |= tmp1;
 244 
 245                                         base[j] = (uchar_t)tmp0;
 246 
 247                                         tmp0 += tmp1;
 248                                         tmp0 = tmp0 & 0xff;
 249 
 250                                         /*
 251                                          * Speculation suceeded! Update [i]
 252                                          * in 2B chunk
 253                                          */
 254                                         /* LINTED E_BAD_PTR_CAST_ALIGN */
 255                                         *((unsigned short *) &base[i1]) =
 256                                             i_accum;
 257 
 258                                         merge |=
 259                                             (unsigned long long)(base[tmp0]) <<
 260                                             48;
 261                                 }
 262 
 263 
 264                                 /*
 265                                  * Too expensive to perform [i] speculation for
 266                                  * every byte. Just need to reduce frequency
 267                                  * of stores until store buffer full stalls
 268                                  * are not the bottleneck.
 269                                  */
 270 
 271                                 /* BYTE 2 */
 272                                 tmp0 = base1[2];
 273                                 j = j + tmp0;
 274                                 tmp1 = base[j];
 275                                 base1[2] = (uchar_t)tmp1;
 276                                 base[j] = (uchar_t)tmp0;
 277                                 tmp1 += tmp0;
 278                                 tmp1 = tmp1 & 0xff;
 279                                 merge |= (unsigned long long)(base[tmp1]) << 40;
 280 
 281                                 /* BYTE 3 */
 282                                 tmp0 = base1[3];
 283                                 j = j + tmp0;
 284                                 tmp1 = base[j];
 285                                 base1[3] = (uchar_t)tmp1;
 286                                 base[j] = (uchar_t)tmp0;
 287                                 tmp0 += tmp1;
 288                                 tmp0 = tmp0 & 0xff;
 289                                 merge |= (unsigned long long)(base[tmp0]) << 32;
 290 
 291                                 /* BYTE 4 */
 292                                 tmp0 = base1[4];
 293                                 j = j + tmp0;
 294                                 tmp1 = base[j];
 295                                 base1[4] = (uchar_t)tmp1;
 296                                 base[j] = (uchar_t)tmp0;
 297                                 tmp0 += tmp1;
 298                                 tmp0 = tmp0 & 0xff;
 299                                 merge |= (unsigned long long)(base[tmp0]) << 24;
 300 
 301                                 /* BYTE 5 */
 302                                 tmp0 = base1[5];
 303                                 j = j + tmp0;
 304                                 tmp1 = base[j];
 305                                 base1[5] = (uchar_t)tmp1;
 306                                 base[j] = (uchar_t)tmp0;
 307                                 tmp0 += tmp1;
 308                                 tmp0 = tmp0 & 0xff;
 309                                 merge |= (unsigned long long)(base[tmp0]) << 16;
 310 
 311                                 /* BYTE 6 */
 312                                 i1 = (i1+6);
 313                                 tmp0 = base1[6];
 314                                 j = j + tmp0;
 315                                 tmp1 = base[j];
 316                                 i_accum = tmp1;
 317                                 base[j] = (uchar_t)tmp0;
 318 
 319                                 tmp0 += tmp1;
 320                                 tmp0 = tmp0 & 0xff;
 321 
 322                                 if (i1 == tmp0) {
 323                                         merge |=
 324                                             (unsigned long long)(i_accum) << 8;
 325                                 } else {
 326                                         merge |=
 327                                             (unsigned long long)(base[tmp0]) <<
 328                                             8;
 329                                 }
 330 
 331                                 /* BYTE 7 */
 332                                 tmp0 = base1[7];
 333 
 334                                 /*
 335                                  * Perform [i] speculation again. Indentical
 336                                  * to that performed for BYTE0 and BYTE1.
 337                                  */
 338                                 j = j + tmp0;
 339                                 if ((i1 ^ j) < 2) {
 340                                         base1[6] = (uchar_t)i_accum;
 341                                         tmp1 = base[j];
 342 
 343                                         base1[7] = (uchar_t)tmp1;
 344                                         base[j] = (uchar_t)tmp0;
 345 
 346                                         tmp0 += tmp1;
 347                                         tmp0 = tmp0 & 0xff;
 348 
 349                                         merge |=
 350                                             (unsigned long long)(base[tmp0]);
 351 
 352                                 } else {
 353                                         tmp1 = base[j];
 354 
 355                                         i_accum = i_accum << 8;
 356                                         i_accum |= tmp1;
 357 
 358                                         base[j] = (uchar_t)tmp0;
 359 
 360                                         tmp0 += tmp1;
 361                                         tmp0 = tmp0 & 0xff;
 362 
 363                                         /* LINTED E_BAD_PTR_CAST_ALIGN */
 364                                         *((unsigned short *) &base[i1]) =
 365                                             i_accum;
 366 
 367                                         merge |=
 368                                             (unsigned long long)(base[tmp0]);
 369                                 }
 370                                 i1++;
 371                         } else {
 372                                 /*
 373                                  * i is too close to wrap-around to allow
 374                                  * masking to be disregarded
 375                                  */
 376 
 377                                 /*
 378                                  * Same old speculation for BYTE 0 and BYTE 1
 379                                  */
 380 
 381                                 /* BYTE 0 */
 382                                 i1 = (i1 + 1) & 0xff;
 383                                 jj = (uchar_t)i1;
 384 
 385                                 tmp0 = base[i1];
 386                                 j = j + tmp0;
 387 
 388                                 tmp1 = base[j];
 389                                 i_accum = tmp1;
 390                                 base[j] = (uchar_t)tmp0;
 391 
 392                                 tmp0 += tmp1;
 393                                 tmp0 = tmp0 & 0xff;
 394 
 395                                 if (i1 == tmp0) {
 396                                         merge =
 397                                             (unsigned long long)(i_accum) << 56;
 398                                 } else {
 399                                         merge =
 400                                             (unsigned long long)(base[tmp0]) <<
 401                                             56;
 402                                 }
 403 
 404                                 /* BYTE 1 */
 405                                 tmp0 = base[i1+1];
 406 
 407                                 j = j + tmp0;
 408 
 409                                 if ((jj ^ j) < 2) {
 410                                         base[jj] = (uchar_t)i_accum;
 411 
 412                                         tmp1 = base[j];
 413 
 414                                         base[i1+1] = (uchar_t)tmp1;
 415                                         base[j] = (uchar_t)tmp0;
 416 
 417                                         tmp0 += tmp1;
 418                                         tmp0 = tmp0 & 0xff;
 419 
 420                                         merge |=
 421                                             (unsigned long long)(base[tmp0]) <<
 422                                             48;
 423                                 } else {
 424 
 425                                         tmp1 = base[j];
 426 
 427                                         i_accum = i_accum << 8;
 428                                         i_accum |= tmp1;
 429 
 430                                         base[j] = (uchar_t)tmp0;
 431 
 432                                         tmp0 += tmp1;
 433                                         tmp0 = tmp0 & 0xff;
 434 
 435                                         /* LINTED E_BAD_PTR_CAST_ALIGN */
 436                                         *((unsigned short *) &base[jj]) =
 437                                             i_accum;
 438 
 439                                         merge |=
 440                                             (unsigned long long)(base[tmp0]) <<
 441                                             48;
 442                                 }
 443 
 444                                 /* BYTE 2 */
 445                                 /*
 446                                  * As know i must be even when enter loop (to
 447                                  * satisfy alignment), can only wrap around
 448                                  * on the even bytes. So just need to perform
 449                                  * mask every 2nd byte
 450                                  */
 451                                 i1 = (i1 + 2) & 0xff;
 452                                 tmp0 = base[i1];
 453                                 j = j + tmp0;
 454                                 tmp1 = base[j];
 455                                 base[i1] = (uchar_t)tmp1;
 456                                 base[j] = (uchar_t)tmp0;
 457                                 tmp0 += tmp1;
 458                                 tmp0 = tmp0 & 0xff;
 459                                 merge |= (unsigned long long)(base[tmp0]) << 40;
 460 
 461                                 /* BYTE 3 */
 462                                 tmp0 = base[i1+1];
 463                                 j = j + tmp0;
 464                                 tmp1 = base[j];
 465                                 base[i1+1] = (uchar_t)tmp1;
 466                                 base[j] = (uchar_t)tmp0;
 467                                 tmp0 += tmp1;
 468                                 tmp0 = tmp0 & 0xff;
 469                                 merge |= (unsigned long long)(base[tmp0]) << 32;
 470 
 471                                 /* BYTE 4 */
 472                                 i1 = (i1 + 2) & 0xff;
 473                                 tmp0 = base[i1];
 474                                 j = j + tmp0;
 475                                 tmp1 = base[j];
 476                                 base[i1] = (uchar_t)tmp1;
 477                                 base[j] = (uchar_t)tmp0;
 478                                 tmp0 += tmp1;
 479                                 tmp0 = tmp0 & 0xff;
 480                                 merge |= (unsigned long long)(base[tmp0]) << 24;
 481 
 482                                 /* BYTE 5 */
 483                                 tmp0 = base[i1+1];
 484                                 j = j + tmp0;
 485                                 tmp1 = base[j];
 486                                 base[i1+1] = (uchar_t)tmp1;
 487                                 base[j] = (uchar_t)tmp0;
 488                                 tmp0 += tmp1;
 489                                 tmp0 = tmp0 & 0xff;
 490                                 merge |= (unsigned long long)(base[tmp0]) << 16;
 491 
 492                                 /* BYTE 6 */
 493                                 i1 = (i1+2) &0xff;
 494                                 jj = (uchar_t)i1;
 495                                 tmp0 = base[i1];
 496 
 497                                 j = j + tmp0;
 498 
 499                                 tmp1 = base[j];
 500                                 i_accum = tmp1;
 501                                 base[j] = (uchar_t)tmp0;
 502 
 503 
 504                                 tmp0 += tmp1;
 505                                 tmp0 = tmp0 & 0xff;
 506 
 507                                 if (i1 == tmp0) {
 508                                         merge |=
 509                                             (unsigned long long)(i_accum) << 8;
 510                                 } else {
 511                                         merge |=
 512                                             (unsigned long long)(base[tmp0]) <<
 513                                             8;
 514                                 }
 515 
 516                                 /* BYTE 7 */
 517                                 i1++;
 518                                 tmp0 = base[i1];
 519 
 520                                 j = j + tmp0;
 521                                 if ((jj ^ j) < 2) {
 522                                         base[jj] = (uchar_t)i_accum;
 523                                         tmp1 = base[j];
 524 
 525                                         base[i1] = (uchar_t)tmp1;
 526                                         base[j] = (uchar_t)tmp0;
 527 
 528                                         tmp0 += tmp1;
 529                                         tmp0 = tmp0 & 0xff;
 530 
 531                                         merge |=
 532                                             (unsigned long long)(base[tmp0]);
 533 
 534                                 } else {
 535 
 536                                         tmp1 = base[j];
 537 
 538                                         i_accum = i_accum << 8;
 539                                         i_accum |= tmp1;
 540 
 541                                         base[j] = (uchar_t)tmp0;
 542 
 543                                         tmp0 += tmp1;
 544                                         tmp0 = tmp0 & 0xff;
 545 
 546                                         /* LINTED E_BAD_PTR_CAST_ALIGN */
 547                                         *((unsigned short *) &base[jj]) =
 548                                             i_accum;
 549 
 550                                         merge |=
 551                                             (unsigned long long)(base[tmp0]);
 552                                 }
 553                         }
 554 
 555                         /*
 556                          * Perform update to [out]
 557                          * Remember could be alignment issues
 558                          */
 559                         /* LINTED E_BAD_PTR_CAST_ALIGN */
 560                         in0 = *((unsigned long long *) (&in[ii]));
 561 
 562                         merge1 = merge0 | (merge >> shift);
 563 
 564                         merge0 = (merge & mask) << 56;
 565 
 566                         in0 = in0 ^ merge1;
 567 
 568                         /* LINTED E_BAD_PTR_CAST_ALIGN */
 569                         *((unsigned long long *) (&out[ii])) = in0;
 570                 }
 571 
 572                 i = (uchar_t)i1;
 573 
 574                 /*
 575                  * Handle any overrun
 576                  */
 577                 if (shift) {
 578                         out[ii] = in[ii] ^ (merge0 >> 56);
 579                         ii++;
 580                 }
 581 
 582                 /*
 583                  * Handle final few bytes
 584                  */
 585                 for (; ii < len; ii++) {
 586                         i = i + 1;
 587                         tmp0 = base[i];
 588                         j = j + tmp0;
 589                         tmp1 = base[j];
 590 
 591                         base[i] = (uchar_t)tmp1;
 592                         base[j] = (uchar_t)tmp0;
 593 
 594                         tmp0 += tmp1;
 595                         tmp0 = tmp0 & 0xff;
 596                         out[ii] = in[ii] ^ base[tmp0];
 597                 }
 598                 key->i = i;
 599                 key->j = j;
 600         }
 601 #endif /* sun4v */
 602 
 603 /* EXPORT DELETE END */
 604 }