Print this page
first pass
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/common/crypto/arcfour/sun4v/arcfour_crypt.c
+++ new/usr/src/common/crypto/arcfour/sun4v/arcfour_crypt.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
↓ open down ↓ |
21 lines elided |
↑ open up ↑ |
22 22 /*
23 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 #include "../arcfour.h"
27 27
28 28 /* Initialize the key stream 'key' using the key value */
29 29 void
30 30 arcfour_key_init(ARCFour_key *key, uchar_t *keyval, int keyvallen)
31 31 {
32 -/* EXPORT DELETE START */
33 -
34 32 uchar_t ext_keyval[256];
35 33 uchar_t tmp;
36 34 int i, j;
37 35
38 36 for (i = j = 0; i < 256; i++, j++) {
39 37 if (j == keyvallen)
40 38 j = 0;
41 39
42 40 ext_keyval[i] = keyval[j];
43 41 }
44 42 for (i = 0; i < 256; i++)
45 43 key->arr[i] = (uchar_t)i;
↓ open down ↓ |
2 lines elided |
↑ open up ↑ |
46 44
47 45 j = 0;
48 46 for (i = 0; i < 256; i++) {
49 47 j = (j + key->arr[i] + ext_keyval[i]) % 256;
50 48 tmp = key->arr[i];
51 49 key->arr[i] = key->arr[j];
52 50 key->arr[j] = tmp;
53 51 }
54 52 key->i = 0;
55 53 key->j = 0;
56 -
57 -/* EXPORT DELETE END */
58 54 }
59 55
60 56
61 57 /*
62 58 * Encipher 'in' using 'key.
63 59 * in and out can point to the same location
64 60 */
65 61 void
66 62 arcfour_crypt(ARCFour_key *key, uchar_t *in, uchar_t *out, size_t len)
67 63 {
68 64 size_t ii;
69 65 unsigned long long in0, merge = 0, merge0 = 0, merge1, mask = 0;
70 66 uchar_t i, j, *base, jj, *base1, tmp;
71 67 unsigned int tmp0, tmp1, i_accum, shift = 0, i1;
72 68
73 -
74 -/* EXPORT DELETE START */
75 69 int index;
76 70
77 71 base = key->arr;
78 72
79 73 index = (((uintptr_t)in) & 0x7);
80 74
81 75 /* Get the 'in' on an 8-byte alignment */
82 76 if (index > 0) {
83 77 i = key->i;
84 78 j = key->j;
85 79
86 80 for (index = 8 - index; (index-- > 0) && len > 0;
87 81 len--, in++, out++) {
88 82
89 83 i = i + 1;
90 84 j = j + key->arr[i];
91 85 tmp = key->arr[i];
92 86 key->arr[i] = key->arr[j];
93 87 key->arr[j] = tmp;
94 88 tmp = key->arr[i] + key->arr[j];
95 89 *out = *in ^ key->arr[tmp];
96 90 }
97 91 key->i = i;
98 92 key->j = j;
99 93
100 94 }
101 95 if (len == 0)
102 96 return;
103 97
104 98 /* See if we're fortunate and 'out' got aligned as well */
105 99
106 100
107 101 /*
108 102 * Niagara optimized version for
109 103 * the cases where the input and output buffers are aligned on
110 104 * a multiple of 8-byte boundary.
111 105 */
112 106 #ifdef sun4v
113 107 if ((((uintptr_t)out) & 7) != 0) {
114 108 #endif /* sun4v */
115 109 i = key->i;
116 110 j = key->j;
117 111 for (ii = 0; ii < len; ii++) {
118 112 i = i + 1;
119 113 tmp0 = base[i];
120 114 j = j + tmp0;
121 115 tmp1 = base[j];
122 116 base[i] = (uchar_t)tmp1;
123 117 base[j] = (uchar_t)tmp0;
124 118 tmp0 += tmp1;
125 119 tmp0 = tmp0 & 0xff;
126 120 out[ii] = in[ii] ^ base[tmp0];
127 121 }
128 122 key->i = i;
129 123 key->j = j;
130 124 #ifdef sun4v
131 125 } else {
132 126 i = key->i;
133 127 j = key->j;
134 128
135 129 /*
136 130 * Want to align base[i] on a 2B boundary -- allows updates
137 131 * via [i] to be performed in 2B chunks (reducing # of stores).
138 132 * Requires appropriate alias detection.
139 133 */
140 134
141 135 if (((i+1) % 2) != 0) {
142 136 i = i + 1;
143 137 tmp0 = base[i];
144 138 j = j + tmp0;
145 139 tmp1 = base[j];
146 140
147 141 base[i] = (uchar_t)tmp1;
148 142 base[j] = (uchar_t)tmp0;
149 143
150 144 tmp0 += tmp1;
151 145 tmp0 = tmp0 & 0xff;
152 146
153 147 merge0 = (unsigned long long)(base[tmp0]) << 56;
154 148 shift = 8; mask = 0xff;
155 149 }
156 150
157 151 /*
158 152 * Note - in and out may now be misaligned -
159 153 * as updating [out] in 8B chunks need to handle this
160 154 * possibility. Also could have a 1B overrun.
161 155 * Need to drop out of loop early as a result.
162 156 */
163 157
164 158 for (ii = 0, i1 = i; ii < ((len-1) & (~7));
165 159 ii += 8, i1 = i1&0xff) {
166 160
167 161 /*
168 162 * If i < less than 248, know wont wrap around
169 163 * (i % 256), so don't need to bother with masking i
170 164 * after each increment
171 165 */
172 166 if (i1 < 248) {
173 167
174 168 /* BYTE 0 */
175 169 i1 = (i1 + 1);
176 170
177 171 /*
178 172 * Creating this base pointer reduces subsequent
179 173 * arihmetic ops required to load [i]
180 174 *
181 175 * N.B. don't need to check if [j] aliases.
182 176 * [i] and [j] end up with the same values
183 177 * anyway.
184 178 */
185 179 base1 = &base[i1];
186 180
187 181 tmp0 = base1[0];
188 182 j = j + tmp0;
189 183
190 184 tmp1 = base[j];
191 185 /*
192 186 * Don't store [i] yet
193 187 */
194 188 i_accum = tmp1;
195 189 base[j] = (uchar_t)tmp0;
196 190
197 191 tmp0 += tmp1;
198 192 tmp0 = tmp0 & 0xff;
199 193
200 194 /*
201 195 * Check [tmp0] doesn't alias with [i]
202 196 */
203 197
204 198 /*
205 199 * Updating [out] in 8B chunks
206 200 */
207 201 if (i1 == tmp0) {
208 202 merge =
209 203 (unsigned long long)(i_accum) << 56;
210 204 } else {
211 205 merge =
212 206 (unsigned long long)(base[tmp0]) <<
213 207 56;
214 208 }
215 209
216 210 /* BYTE 1 */
217 211 tmp0 = base1[1];
218 212
219 213 j = j + tmp0;
220 214
221 215 /*
222 216 * [j] can now alias with [i] and [i-1]
223 217 * If alias abort speculation
224 218 */
225 219 if ((i1 ^ j) < 2) {
226 220 base1[0] = (uchar_t)i_accum;
227 221
228 222 tmp1 = base[j];
229 223
230 224 base1[1] = (uchar_t)tmp1;
231 225 base[j] = (uchar_t)tmp0;
232 226
233 227 tmp0 += tmp1;
234 228 tmp0 = tmp0 & 0xff;
235 229
236 230 merge |= (unsigned long long)
237 231 (base[tmp0]) << 48;
238 232 } else {
239 233
240 234 tmp1 = base[j];
241 235
242 236 i_accum = i_accum << 8;
243 237 i_accum |= tmp1;
244 238
245 239 base[j] = (uchar_t)tmp0;
246 240
247 241 tmp0 += tmp1;
248 242 tmp0 = tmp0 & 0xff;
249 243
250 244 /*
251 245 * Speculation suceeded! Update [i]
252 246 * in 2B chunk
253 247 */
254 248 /* LINTED E_BAD_PTR_CAST_ALIGN */
255 249 *((unsigned short *) &base[i1]) =
256 250 i_accum;
257 251
258 252 merge |=
259 253 (unsigned long long)(base[tmp0]) <<
260 254 48;
261 255 }
262 256
263 257
264 258 /*
265 259 * Too expensive to perform [i] speculation for
266 260 * every byte. Just need to reduce frequency
267 261 * of stores until store buffer full stalls
268 262 * are not the bottleneck.
269 263 */
270 264
271 265 /* BYTE 2 */
272 266 tmp0 = base1[2];
273 267 j = j + tmp0;
274 268 tmp1 = base[j];
275 269 base1[2] = (uchar_t)tmp1;
276 270 base[j] = (uchar_t)tmp0;
277 271 tmp1 += tmp0;
278 272 tmp1 = tmp1 & 0xff;
279 273 merge |= (unsigned long long)(base[tmp1]) << 40;
280 274
281 275 /* BYTE 3 */
282 276 tmp0 = base1[3];
283 277 j = j + tmp0;
284 278 tmp1 = base[j];
285 279 base1[3] = (uchar_t)tmp1;
286 280 base[j] = (uchar_t)tmp0;
287 281 tmp0 += tmp1;
288 282 tmp0 = tmp0 & 0xff;
289 283 merge |= (unsigned long long)(base[tmp0]) << 32;
290 284
291 285 /* BYTE 4 */
292 286 tmp0 = base1[4];
293 287 j = j + tmp0;
294 288 tmp1 = base[j];
295 289 base1[4] = (uchar_t)tmp1;
296 290 base[j] = (uchar_t)tmp0;
297 291 tmp0 += tmp1;
298 292 tmp0 = tmp0 & 0xff;
299 293 merge |= (unsigned long long)(base[tmp0]) << 24;
300 294
301 295 /* BYTE 5 */
302 296 tmp0 = base1[5];
303 297 j = j + tmp0;
304 298 tmp1 = base[j];
305 299 base1[5] = (uchar_t)tmp1;
306 300 base[j] = (uchar_t)tmp0;
307 301 tmp0 += tmp1;
308 302 tmp0 = tmp0 & 0xff;
309 303 merge |= (unsigned long long)(base[tmp0]) << 16;
310 304
311 305 /* BYTE 6 */
312 306 i1 = (i1+6);
313 307 tmp0 = base1[6];
314 308 j = j + tmp0;
315 309 tmp1 = base[j];
316 310 i_accum = tmp1;
317 311 base[j] = (uchar_t)tmp0;
318 312
319 313 tmp0 += tmp1;
320 314 tmp0 = tmp0 & 0xff;
321 315
322 316 if (i1 == tmp0) {
323 317 merge |=
324 318 (unsigned long long)(i_accum) << 8;
325 319 } else {
326 320 merge |=
327 321 (unsigned long long)(base[tmp0]) <<
328 322 8;
329 323 }
330 324
331 325 /* BYTE 7 */
332 326 tmp0 = base1[7];
333 327
334 328 /*
335 329 * Perform [i] speculation again. Indentical
336 330 * to that performed for BYTE0 and BYTE1.
337 331 */
338 332 j = j + tmp0;
339 333 if ((i1 ^ j) < 2) {
340 334 base1[6] = (uchar_t)i_accum;
341 335 tmp1 = base[j];
342 336
343 337 base1[7] = (uchar_t)tmp1;
344 338 base[j] = (uchar_t)tmp0;
345 339
346 340 tmp0 += tmp1;
347 341 tmp0 = tmp0 & 0xff;
348 342
349 343 merge |=
350 344 (unsigned long long)(base[tmp0]);
351 345
352 346 } else {
353 347 tmp1 = base[j];
354 348
355 349 i_accum = i_accum << 8;
356 350 i_accum |= tmp1;
357 351
358 352 base[j] = (uchar_t)tmp0;
359 353
360 354 tmp0 += tmp1;
361 355 tmp0 = tmp0 & 0xff;
362 356
363 357 /* LINTED E_BAD_PTR_CAST_ALIGN */
364 358 *((unsigned short *) &base[i1]) =
365 359 i_accum;
366 360
367 361 merge |=
368 362 (unsigned long long)(base[tmp0]);
369 363 }
370 364 i1++;
371 365 } else {
372 366 /*
373 367 * i is too close to wrap-around to allow
374 368 * masking to be disregarded
375 369 */
376 370
377 371 /*
378 372 * Same old speculation for BYTE 0 and BYTE 1
379 373 */
380 374
381 375 /* BYTE 0 */
382 376 i1 = (i1 + 1) & 0xff;
383 377 jj = (uchar_t)i1;
384 378
385 379 tmp0 = base[i1];
386 380 j = j + tmp0;
387 381
388 382 tmp1 = base[j];
389 383 i_accum = tmp1;
390 384 base[j] = (uchar_t)tmp0;
391 385
392 386 tmp0 += tmp1;
393 387 tmp0 = tmp0 & 0xff;
394 388
395 389 if (i1 == tmp0) {
396 390 merge =
397 391 (unsigned long long)(i_accum) << 56;
398 392 } else {
399 393 merge =
400 394 (unsigned long long)(base[tmp0]) <<
401 395 56;
402 396 }
403 397
404 398 /* BYTE 1 */
405 399 tmp0 = base[i1+1];
406 400
407 401 j = j + tmp0;
408 402
409 403 if ((jj ^ j) < 2) {
410 404 base[jj] = (uchar_t)i_accum;
411 405
412 406 tmp1 = base[j];
413 407
414 408 base[i1+1] = (uchar_t)tmp1;
415 409 base[j] = (uchar_t)tmp0;
416 410
417 411 tmp0 += tmp1;
418 412 tmp0 = tmp0 & 0xff;
419 413
420 414 merge |=
421 415 (unsigned long long)(base[tmp0]) <<
422 416 48;
423 417 } else {
424 418
425 419 tmp1 = base[j];
426 420
427 421 i_accum = i_accum << 8;
428 422 i_accum |= tmp1;
429 423
430 424 base[j] = (uchar_t)tmp0;
431 425
432 426 tmp0 += tmp1;
433 427 tmp0 = tmp0 & 0xff;
434 428
435 429 /* LINTED E_BAD_PTR_CAST_ALIGN */
436 430 *((unsigned short *) &base[jj]) =
437 431 i_accum;
438 432
439 433 merge |=
440 434 (unsigned long long)(base[tmp0]) <<
441 435 48;
442 436 }
443 437
444 438 /* BYTE 2 */
445 439 /*
446 440 * As know i must be even when enter loop (to
447 441 * satisfy alignment), can only wrap around
448 442 * on the even bytes. So just need to perform
449 443 * mask every 2nd byte
450 444 */
451 445 i1 = (i1 + 2) & 0xff;
452 446 tmp0 = base[i1];
453 447 j = j + tmp0;
454 448 tmp1 = base[j];
455 449 base[i1] = (uchar_t)tmp1;
456 450 base[j] = (uchar_t)tmp0;
457 451 tmp0 += tmp1;
458 452 tmp0 = tmp0 & 0xff;
459 453 merge |= (unsigned long long)(base[tmp0]) << 40;
460 454
461 455 /* BYTE 3 */
462 456 tmp0 = base[i1+1];
463 457 j = j + tmp0;
464 458 tmp1 = base[j];
465 459 base[i1+1] = (uchar_t)tmp1;
466 460 base[j] = (uchar_t)tmp0;
467 461 tmp0 += tmp1;
468 462 tmp0 = tmp0 & 0xff;
469 463 merge |= (unsigned long long)(base[tmp0]) << 32;
470 464
471 465 /* BYTE 4 */
472 466 i1 = (i1 + 2) & 0xff;
473 467 tmp0 = base[i1];
474 468 j = j + tmp0;
475 469 tmp1 = base[j];
476 470 base[i1] = (uchar_t)tmp1;
477 471 base[j] = (uchar_t)tmp0;
478 472 tmp0 += tmp1;
479 473 tmp0 = tmp0 & 0xff;
480 474 merge |= (unsigned long long)(base[tmp0]) << 24;
481 475
482 476 /* BYTE 5 */
483 477 tmp0 = base[i1+1];
484 478 j = j + tmp0;
485 479 tmp1 = base[j];
486 480 base[i1+1] = (uchar_t)tmp1;
487 481 base[j] = (uchar_t)tmp0;
488 482 tmp0 += tmp1;
489 483 tmp0 = tmp0 & 0xff;
490 484 merge |= (unsigned long long)(base[tmp0]) << 16;
491 485
492 486 /* BYTE 6 */
493 487 i1 = (i1+2) &0xff;
494 488 jj = (uchar_t)i1;
495 489 tmp0 = base[i1];
496 490
497 491 j = j + tmp0;
498 492
499 493 tmp1 = base[j];
500 494 i_accum = tmp1;
501 495 base[j] = (uchar_t)tmp0;
502 496
503 497
504 498 tmp0 += tmp1;
505 499 tmp0 = tmp0 & 0xff;
506 500
507 501 if (i1 == tmp0) {
508 502 merge |=
509 503 (unsigned long long)(i_accum) << 8;
510 504 } else {
511 505 merge |=
512 506 (unsigned long long)(base[tmp0]) <<
513 507 8;
514 508 }
515 509
516 510 /* BYTE 7 */
517 511 i1++;
518 512 tmp0 = base[i1];
519 513
520 514 j = j + tmp0;
521 515 if ((jj ^ j) < 2) {
522 516 base[jj] = (uchar_t)i_accum;
523 517 tmp1 = base[j];
524 518
525 519 base[i1] = (uchar_t)tmp1;
526 520 base[j] = (uchar_t)tmp0;
527 521
528 522 tmp0 += tmp1;
529 523 tmp0 = tmp0 & 0xff;
530 524
531 525 merge |=
532 526 (unsigned long long)(base[tmp0]);
533 527
534 528 } else {
535 529
536 530 tmp1 = base[j];
537 531
538 532 i_accum = i_accum << 8;
539 533 i_accum |= tmp1;
540 534
541 535 base[j] = (uchar_t)tmp0;
542 536
543 537 tmp0 += tmp1;
544 538 tmp0 = tmp0 & 0xff;
545 539
546 540 /* LINTED E_BAD_PTR_CAST_ALIGN */
547 541 *((unsigned short *) &base[jj]) =
548 542 i_accum;
549 543
550 544 merge |=
551 545 (unsigned long long)(base[tmp0]);
552 546 }
553 547 }
554 548
555 549 /*
556 550 * Perform update to [out]
557 551 * Remember could be alignment issues
558 552 */
559 553 /* LINTED E_BAD_PTR_CAST_ALIGN */
560 554 in0 = *((unsigned long long *) (&in[ii]));
561 555
562 556 merge1 = merge0 | (merge >> shift);
563 557
564 558 merge0 = (merge & mask) << 56;
565 559
566 560 in0 = in0 ^ merge1;
567 561
568 562 /* LINTED E_BAD_PTR_CAST_ALIGN */
569 563 *((unsigned long long *) (&out[ii])) = in0;
570 564 }
571 565
572 566 i = (uchar_t)i1;
573 567
574 568 /*
575 569 * Handle any overrun
576 570 */
577 571 if (shift) {
578 572 out[ii] = in[ii] ^ (merge0 >> 56);
579 573 ii++;
580 574 }
581 575
582 576 /*
583 577 * Handle final few bytes
584 578 */
585 579 for (; ii < len; ii++) {
586 580 i = i + 1;
587 581 tmp0 = base[i];
588 582 j = j + tmp0;
589 583 tmp1 = base[j];
590 584
591 585 base[i] = (uchar_t)tmp1;
↓ open down ↓ |
507 lines elided |
↑ open up ↑ |
592 586 base[j] = (uchar_t)tmp0;
593 587
594 588 tmp0 += tmp1;
595 589 tmp0 = tmp0 & 0xff;
596 590 out[ii] = in[ii] ^ base[tmp0];
597 591 }
598 592 key->i = i;
599 593 key->j = j;
600 594 }
601 595 #endif /* sun4v */
602 -
603 -/* EXPORT DELETE END */
604 596 }
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX