Print this page
patch as-lock-macro-simplification
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/xen/io/xpvtap.c
+++ new/usr/src/uts/common/xen/io/xpvtap.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27
28 28 #include <sys/errno.h>
29 29 #include <sys/types.h>
30 30 #include <sys/conf.h>
31 31 #include <sys/kmem.h>
32 32 #include <sys/ddi.h>
33 33 #include <sys/stat.h>
34 34 #include <sys/sunddi.h>
35 35 #include <sys/file.h>
36 36 #include <sys/open.h>
37 37 #include <sys/modctl.h>
38 38 #include <sys/ddi_impldefs.h>
39 39 #include <sys/sysmacros.h>
40 40 #include <sys/ddidevmap.h>
41 41 #include <sys/policy.h>
42 42
43 43 #include <sys/vmsystm.h>
44 44 #include <vm/hat_i86.h>
45 45 #include <vm/hat_pte.h>
46 46 #include <vm/seg_kmem.h>
47 47 #include <vm/seg_mf.h>
48 48
49 49 #include <xen/io/blkif_impl.h>
50 50 #include <xen/io/blk_common.h>
51 51 #include <xen/io/xpvtap.h>
52 52
53 53
54 54 static int xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred);
55 55 static int xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred);
56 56 static int xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
57 57 cred_t *cred, int *rval);
58 58 static int xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off,
59 59 size_t len, size_t *maplen, uint_t model);
60 60 static int xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp,
61 61 off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags,
62 62 cred_t *cred_p);
63 63 static int xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
64 64 struct pollhead **phpp);
65 65
66 66 static struct cb_ops xpvtap_cb_ops = {
67 67 xpvtap_open, /* cb_open */
68 68 xpvtap_close, /* cb_close */
69 69 nodev, /* cb_strategy */
70 70 nodev, /* cb_print */
71 71 nodev, /* cb_dump */
72 72 nodev, /* cb_read */
73 73 nodev, /* cb_write */
74 74 xpvtap_ioctl, /* cb_ioctl */
75 75 xpvtap_devmap, /* cb_devmap */
76 76 nodev, /* cb_mmap */
77 77 xpvtap_segmap, /* cb_segmap */
78 78 xpvtap_chpoll, /* cb_chpoll */
79 79 ddi_prop_op, /* cb_prop_op */
80 80 NULL, /* cb_stream */
81 81 D_NEW | D_MP | D_64BIT | D_DEVMAP, /* cb_flag */
82 82 CB_REV
83 83 };
84 84
85 85 static int xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
86 86 void **result);
87 87 static int xpvtap_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
88 88 static int xpvtap_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
89 89
90 90 static struct dev_ops xpvtap_dev_ops = {
91 91 DEVO_REV, /* devo_rev */
92 92 0, /* devo_refcnt */
93 93 xpvtap_getinfo, /* devo_getinfo */
94 94 nulldev, /* devo_identify */
95 95 nulldev, /* devo_probe */
96 96 xpvtap_attach, /* devo_attach */
97 97 xpvtap_detach, /* devo_detach */
98 98 nodev, /* devo_reset */
99 99 &xpvtap_cb_ops, /* devo_cb_ops */
100 100 NULL, /* devo_bus_ops */
101 101 NULL /* power */
102 102 };
103 103
104 104
105 105 static struct modldrv xpvtap_modldrv = {
106 106 &mod_driverops, /* Type of module. This one is a driver */
107 107 "xpvtap driver", /* Name of the module. */
108 108 &xpvtap_dev_ops, /* driver ops */
109 109 };
110 110
111 111 static struct modlinkage xpvtap_modlinkage = {
112 112 MODREV_1,
113 113 (void *) &xpvtap_modldrv,
114 114 NULL
115 115 };
116 116
117 117
118 118 void *xpvtap_statep;
119 119
120 120
121 121 static xpvtap_state_t *xpvtap_drv_init(int instance);
122 122 static void xpvtap_drv_fini(xpvtap_state_t *state);
123 123 static uint_t xpvtap_intr(caddr_t arg);
124 124
125 125 typedef void (*xpvtap_rs_cleanup_t)(xpvtap_state_t *state, uint_t rs);
126 126 static void xpvtap_rs_init(uint_t min_val, uint_t max_val,
127 127 xpvtap_rs_hdl_t *handle);
128 128 static void xpvtap_rs_fini(xpvtap_rs_hdl_t *handle);
129 129 static int xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *rs);
130 130 static void xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t rs);
131 131 static void xpvtap_rs_flush(xpvtap_rs_hdl_t handle,
132 132 xpvtap_rs_cleanup_t callback, void *arg);
133 133
134 134 static int xpvtap_segmf_register(xpvtap_state_t *state);
135 135 static void xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event);
136 136
137 137 static int xpvtap_user_init(xpvtap_state_t *state);
138 138 static void xpvtap_user_fini(xpvtap_state_t *state);
139 139 static int xpvtap_user_ring_init(xpvtap_state_t *state);
140 140 static void xpvtap_user_ring_fini(xpvtap_state_t *state);
141 141 static int xpvtap_user_thread_init(xpvtap_state_t *state);
142 142 static void xpvtap_user_thread_fini(xpvtap_state_t *state);
143 143 static void xpvtap_user_thread_start(caddr_t arg);
144 144 static void xpvtap_user_thread_stop(xpvtap_state_t *state);
145 145 static void xpvtap_user_thread(void *arg);
146 146
147 147 static void xpvtap_user_app_stop(caddr_t arg);
148 148
149 149 static int xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req,
150 150 uint_t *uid);
151 151 static int xpvtap_user_request_push(xpvtap_state_t *state,
152 152 blkif_request_t *req, uint_t uid);
153 153 static int xpvtap_user_response_get(xpvtap_state_t *state,
154 154 blkif_response_t *resp, uint_t *uid);
155 155 static void xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid);
156 156
157 157
158 158 /*
159 159 * _init()
160 160 */
161 161 int
162 162 _init(void)
163 163 {
164 164 int e;
165 165
166 166 e = ddi_soft_state_init(&xpvtap_statep, sizeof (xpvtap_state_t), 1);
167 167 if (e != 0) {
168 168 return (e);
169 169 }
170 170
171 171 e = mod_install(&xpvtap_modlinkage);
172 172 if (e != 0) {
173 173 ddi_soft_state_fini(&xpvtap_statep);
174 174 return (e);
175 175 }
176 176
177 177 return (0);
178 178 }
179 179
180 180
181 181 /*
182 182 * _info()
183 183 */
184 184 int
185 185 _info(struct modinfo *modinfop)
186 186 {
187 187 return (mod_info(&xpvtap_modlinkage, modinfop));
188 188 }
189 189
190 190
191 191 /*
192 192 * _fini()
193 193 */
194 194 int
195 195 _fini(void)
196 196 {
197 197 int e;
198 198
199 199 e = mod_remove(&xpvtap_modlinkage);
200 200 if (e != 0) {
201 201 return (e);
202 202 }
203 203
204 204 ddi_soft_state_fini(&xpvtap_statep);
205 205
206 206 return (0);
207 207 }
208 208
209 209
210 210 /*
211 211 * xpvtap_attach()
212 212 */
213 213 static int
214 214 xpvtap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
215 215 {
216 216 blk_ringinit_args_t args;
217 217 xpvtap_state_t *state;
218 218 int instance;
219 219 int e;
220 220
221 221
222 222 switch (cmd) {
223 223 case DDI_ATTACH:
224 224 break;
225 225
226 226 case DDI_RESUME:
227 227 return (DDI_SUCCESS);
228 228
229 229 default:
230 230 return (DDI_FAILURE);
231 231 }
232 232
233 233 /* initialize our state info */
234 234 instance = ddi_get_instance(dip);
235 235 state = xpvtap_drv_init(instance);
236 236 if (state == NULL) {
237 237 return (DDI_FAILURE);
238 238 }
239 239 state->bt_dip = dip;
240 240
241 241 /* Initialize the guest ring */
242 242 args.ar_dip = state->bt_dip;
243 243 args.ar_intr = xpvtap_intr;
244 244 args.ar_intr_arg = (caddr_t)state;
245 245 args.ar_ringup = xpvtap_user_thread_start;
246 246 args.ar_ringup_arg = (caddr_t)state;
247 247 args.ar_ringdown = xpvtap_user_app_stop;
248 248 args.ar_ringdown_arg = (caddr_t)state;
249 249 e = blk_ring_init(&args, &state->bt_guest_ring);
250 250 if (e != DDI_SUCCESS) {
251 251 goto attachfail_ringinit;
252 252 }
253 253
254 254 /* create the minor node (for ioctl/mmap) */
255 255 e = ddi_create_minor_node(dip, "xpvtap", S_IFCHR, instance,
256 256 DDI_PSEUDO, 0);
257 257 if (e != DDI_SUCCESS) {
258 258 goto attachfail_minor_node;
259 259 }
260 260
261 261 /* Report that driver was loaded */
262 262 ddi_report_dev(dip);
263 263
264 264 return (DDI_SUCCESS);
265 265
266 266 attachfail_minor_node:
267 267 blk_ring_fini(&state->bt_guest_ring);
268 268 attachfail_ringinit:
269 269 xpvtap_drv_fini(state);
270 270 return (DDI_FAILURE);
271 271 }
272 272
273 273
274 274 /*
275 275 * xpvtap_detach()
276 276 */
277 277 static int
278 278 xpvtap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
279 279 {
280 280 xpvtap_state_t *state;
281 281 int instance;
282 282
283 283
284 284 instance = ddi_get_instance(dip);
285 285 state = ddi_get_soft_state(xpvtap_statep, instance);
286 286 if (state == NULL) {
287 287 return (DDI_FAILURE);
288 288 }
289 289
290 290 switch (cmd) {
291 291 case DDI_DETACH:
292 292 break;
293 293
294 294 case DDI_SUSPEND:
295 295 default:
296 296 return (DDI_FAILURE);
297 297 }
298 298
299 299 xpvtap_user_thread_stop(state);
300 300 blk_ring_fini(&state->bt_guest_ring);
301 301 xpvtap_drv_fini(state);
302 302 ddi_remove_minor_node(dip, NULL);
303 303
304 304 return (DDI_SUCCESS);
305 305 }
306 306
307 307
308 308 /*
309 309 * xpvtap_getinfo()
310 310 */
311 311 /*ARGSUSED*/
312 312 static int
313 313 xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
314 314 {
315 315 xpvtap_state_t *state;
316 316 int instance;
317 317 dev_t dev;
318 318 int e;
319 319
320 320
321 321 dev = (dev_t)arg;
322 322 instance = getminor(dev);
323 323
324 324 switch (cmd) {
325 325 case DDI_INFO_DEVT2DEVINFO:
326 326 state = ddi_get_soft_state(xpvtap_statep, instance);
327 327 if (state == NULL) {
328 328 return (DDI_FAILURE);
329 329 }
330 330 *result = (void *)state->bt_dip;
331 331 e = DDI_SUCCESS;
332 332 break;
333 333
334 334 case DDI_INFO_DEVT2INSTANCE:
335 335 *result = (void *)(uintptr_t)instance;
336 336 e = DDI_SUCCESS;
337 337 break;
338 338
339 339 default:
340 340 e = DDI_FAILURE;
341 341 break;
342 342 }
343 343
344 344 return (e);
345 345 }
346 346
347 347
348 348 /*
349 349 * xpvtap_open()
350 350 */
351 351 /*ARGSUSED*/
352 352 static int
353 353 xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred)
354 354 {
355 355 xpvtap_state_t *state;
356 356 int instance;
357 357
358 358
359 359 if (secpolicy_xvm_control(cred)) {
360 360 return (EPERM);
361 361 }
362 362
363 363 instance = getminor(*devp);
364 364 state = ddi_get_soft_state(xpvtap_statep, instance);
365 365 if (state == NULL) {
366 366 return (ENXIO);
367 367 }
368 368
369 369 /* we should only be opened once */
370 370 mutex_enter(&state->bt_open.bo_mutex);
371 371 if (state->bt_open.bo_opened) {
372 372 mutex_exit(&state->bt_open.bo_mutex);
373 373 return (EBUSY);
374 374 }
375 375 state->bt_open.bo_opened = B_TRUE;
376 376 mutex_exit(&state->bt_open.bo_mutex);
377 377
378 378 /*
379 379 * save the apps address space. need it for mapping/unmapping grefs
380 380 * since will be doing it in a separate kernel thread.
381 381 */
382 382 state->bt_map.um_as = curproc->p_as;
383 383
384 384 return (0);
385 385 }
386 386
387 387
388 388 /*
389 389 * xpvtap_close()
390 390 */
391 391 /*ARGSUSED*/
392 392 static int
393 393 xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred)
394 394 {
395 395 xpvtap_state_t *state;
396 396 int instance;
397 397
398 398
399 399 instance = getminor(devp);
400 400 state = ddi_get_soft_state(xpvtap_statep, instance);
401 401 if (state == NULL) {
402 402 return (ENXIO);
403 403 }
404 404
405 405 /*
406 406 * wake thread so it can cleanup and wait for it to exit so we can
407 407 * be sure it's not in the middle of processing a request/response.
408 408 */
409 409 mutex_enter(&state->bt_thread.ut_mutex);
410 410 state->bt_thread.ut_wake = B_TRUE;
411 411 state->bt_thread.ut_exit = B_TRUE;
412 412 cv_signal(&state->bt_thread.ut_wake_cv);
413 413 if (!state->bt_thread.ut_exit_done) {
414 414 cv_wait(&state->bt_thread.ut_exit_done_cv,
415 415 &state->bt_thread.ut_mutex);
416 416 }
417 417 ASSERT(state->bt_thread.ut_exit_done);
418 418 mutex_exit(&state->bt_thread.ut_mutex);
419 419
420 420 state->bt_map.um_as = NULL;
421 421 state->bt_map.um_guest_pages = NULL;
422 422
423 423 /*
424 424 * when the ring is brought down, a userland hotplug script is run
425 425 * which tries to bring the userland app down. We'll wait for a bit
426 426 * for the user app to exit. Notify the thread waiting that the app
427 427 * has closed the driver.
428 428 */
429 429 mutex_enter(&state->bt_open.bo_mutex);
430 430 ASSERT(state->bt_open.bo_opened);
431 431 state->bt_open.bo_opened = B_FALSE;
432 432 cv_signal(&state->bt_open.bo_exit_cv);
433 433 mutex_exit(&state->bt_open.bo_mutex);
434 434
435 435 return (0);
436 436 }
437 437
438 438
439 439 /*
440 440 * xpvtap_ioctl()
441 441 */
442 442 /*ARGSUSED*/
443 443 static int
444 444 xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
445 445 int *rval)
446 446 {
447 447 xpvtap_state_t *state;
448 448 int instance;
449 449
450 450
451 451 if (secpolicy_xvm_control(cred)) {
452 452 return (EPERM);
453 453 }
454 454
455 455 instance = getminor(dev);
456 456 if (instance == -1) {
457 457 return (EBADF);
458 458 }
459 459
460 460 state = ddi_get_soft_state(xpvtap_statep, instance);
461 461 if (state == NULL) {
462 462 return (EBADF);
463 463 }
464 464
465 465 switch (cmd) {
466 466 case XPVTAP_IOCTL_RESP_PUSH:
467 467 /*
468 468 * wake thread, thread handles guest requests and user app
469 469 * responses.
470 470 */
471 471 mutex_enter(&state->bt_thread.ut_mutex);
472 472 state->bt_thread.ut_wake = B_TRUE;
473 473 cv_signal(&state->bt_thread.ut_wake_cv);
474 474 mutex_exit(&state->bt_thread.ut_mutex);
475 475 break;
476 476
477 477 default:
478 478 cmn_err(CE_WARN, "ioctl(%d) not supported\n", cmd);
479 479 return (ENXIO);
480 480 }
481 481
482 482 return (0);
483 483 }
484 484
485 485
486 486 /*
487 487 * xpvtap_segmap()
488 488 */
489 489 /*ARGSUSED*/
490 490 static int
491 491 xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp,
492 492 off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags,
493 493 cred_t *cred_p)
494 494 {
495 495 struct segmf_crargs a;
496 496 xpvtap_state_t *state;
497 497 int instance;
498 498 int e;
499 499
500 500
501 501 if (secpolicy_xvm_control(cred_p)) {
502 502 return (EPERM);
503 503 }
504 504
505 505 instance = getminor(dev);
506 506 state = ddi_get_soft_state(xpvtap_statep, instance);
507 507 if (state == NULL) {
508 508 return (EBADF);
509 509 }
510 510
511 511 /* the user app should be doing a MAP_SHARED mapping */
512 512 if ((flags & MAP_TYPE) != MAP_SHARED) {
513 513 return (EINVAL);
514 514 }
515 515
516 516 /*
517 517 * if this is the user ring (offset = 0), devmap it (which ends up in
518 518 * xpvtap_devmap). devmap will alloc and map the ring into the
519 519 * app's VA space.
520 520 */
521 521 if (off == 0) {
522 522 e = devmap_setup(dev, (offset_t)off, asp, addrp, (size_t)len,
523 523 prot, maxprot, flags, cred_p);
524 524 return (e);
525 525 }
526 526
527 527 /* this should be the mmap for the gref pages (offset = PAGESIZE) */
528 528 if (off != PAGESIZE) {
529 529 return (EINVAL);
530 530 }
531 531
532 532 /* make sure we get the size we're expecting */
533 533 if (len != XPVTAP_GREF_BUFSIZE) {
534 534 return (EINVAL);
535 535 }
536 536
537 537 /*
538 538 * reserve user app VA space for the gref pages and use segmf to
539 539 * manage the backing store for the physical memory. segmf will
540 540 * map in/out the grefs and fault them in/out.
541 541 */
542 542 ASSERT(asp == state->bt_map.um_as);
543 543 as_rangelock(asp);
544 544 if ((flags & MAP_FIXED) == 0) {
545 545 map_addr(addrp, len, 0, 0, flags);
546 546 if (*addrp == NULL) {
547 547 as_rangeunlock(asp);
548 548 return (ENOMEM);
549 549 }
550 550 } else {
551 551 /* User specified address */
552 552 (void) as_unmap(asp, *addrp, len);
553 553 }
554 554 a.dev = dev;
555 555 a.prot = (uchar_t)prot;
556 556 a.maxprot = (uchar_t)maxprot;
557 557 e = as_map(asp, *addrp, len, segmf_create, &a);
558 558 if (e != 0) {
559 559 as_rangeunlock(asp);
560 560 return (e);
561 561 }
562 562 as_rangeunlock(asp);
563 563
564 564 /*
565 565 * Stash user base address, and compute address where the request
566 566 * array will end up.
567 567 */
568 568 state->bt_map.um_guest_pages = (caddr_t)*addrp;
569 569 state->bt_map.um_guest_size = (size_t)len;
570 570
571 571 /* register an as callback so we can cleanup when the app goes away */
572 572 e = as_add_callback(asp, xpvtap_segmf_unregister, state,
573 573 AS_UNMAP_EVENT, *addrp, len, KM_SLEEP);
574 574 if (e != 0) {
575 575 (void) as_unmap(asp, *addrp, len);
576 576 return (EINVAL);
577 577 }
578 578
579 579 /* wake thread to see if there are requests already queued up */
580 580 mutex_enter(&state->bt_thread.ut_mutex);
581 581 state->bt_thread.ut_wake = B_TRUE;
582 582 cv_signal(&state->bt_thread.ut_wake_cv);
583 583 mutex_exit(&state->bt_thread.ut_mutex);
584 584
585 585 return (0);
586 586 }
587 587
588 588
589 589 /*
590 590 * xpvtap_devmap()
591 591 */
592 592 /*ARGSUSED*/
593 593 static int
594 594 xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
595 595 size_t *maplen, uint_t model)
596 596 {
597 597 xpvtap_user_ring_t *usring;
598 598 xpvtap_state_t *state;
599 599 int instance;
600 600 int e;
601 601
602 602
603 603 instance = getminor(dev);
604 604 state = ddi_get_soft_state(xpvtap_statep, instance);
605 605 if (state == NULL) {
606 606 return (EBADF);
607 607 }
608 608
609 609 /* we should only get here if the offset was == 0 */
610 610 if (off != 0) {
611 611 return (EINVAL);
612 612 }
613 613
614 614 /* we should only be mapping in one page */
615 615 if (len != PAGESIZE) {
616 616 return (EINVAL);
617 617 }
618 618
619 619 /*
620 620 * we already allocated the user ring during driver attach, all we
621 621 * need to do is map it into the user app's VA.
622 622 */
623 623 usring = &state->bt_user_ring;
624 624 e = devmap_umem_setup(dhp, state->bt_dip, NULL, usring->ur_cookie, 0,
625 625 PAGESIZE, PROT_ALL, DEVMAP_DEFAULTS, NULL);
626 626 if (e < 0) {
627 627 return (e);
628 628 }
629 629
630 630 /* return the size to compete the devmap */
631 631 *maplen = PAGESIZE;
632 632
633 633 return (0);
634 634 }
635 635
636 636
637 637 /*
638 638 * xpvtap_chpoll()
639 639 */
640 640 static int
641 641 xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
642 642 struct pollhead **phpp)
643 643 {
644 644 xpvtap_user_ring_t *usring;
645 645 xpvtap_state_t *state;
646 646 int instance;
647 647
648 648
649 649 instance = getminor(dev);
650 650 if (instance == -1) {
651 651 return (EBADF);
652 652 }
653 653 state = ddi_get_soft_state(xpvtap_statep, instance);
654 654 if (state == NULL) {
655 655 return (EBADF);
656 656 }
657 657
658 658 if (((events & (POLLIN | POLLRDNORM)) == 0) && !anyyet) {
659 659 *reventsp = 0;
660 660 return (EINVAL);
661 661 }
662 662
663 663 /*
664 664 * if we pushed requests on the user ring since the last poll, wakeup
665 665 * the user app
666 666 */
667 667 usring = &state->bt_user_ring;
668 668 if (usring->ur_prod_polled != usring->ur_ring.req_prod_pvt) {
669 669
670 670 /*
671 671 * XXX - is this faster here or xpvtap_user_request_push??
672 672 * prelim data says here. Because less membars or because
673 673 * user thread will spin in poll requests before getting to
674 674 * responses?
675 675 */
676 676 RING_PUSH_REQUESTS(&usring->ur_ring);
677 677
678 678 usring->ur_prod_polled = usring->ur_ring.sring->req_prod;
679 679 *reventsp = POLLIN | POLLRDNORM;
680 680
681 681 /* no new requests */
682 682 } else {
683 683 *reventsp = 0;
684 684 if (!anyyet) {
685 685 *phpp = &state->bt_pollhead;
686 686 }
687 687 }
688 688
689 689 return (0);
690 690 }
691 691
692 692
693 693 /*
694 694 * xpvtap_drv_init()
695 695 */
696 696 static xpvtap_state_t *
697 697 xpvtap_drv_init(int instance)
698 698 {
699 699 xpvtap_state_t *state;
700 700 int e;
701 701
702 702
703 703 e = ddi_soft_state_zalloc(xpvtap_statep, instance);
704 704 if (e != DDI_SUCCESS) {
705 705 return (NULL);
706 706 }
707 707 state = ddi_get_soft_state(xpvtap_statep, instance);
708 708 if (state == NULL) {
709 709 goto drvinitfail_get_soft_state;
710 710 }
711 711
712 712 state->bt_instance = instance;
713 713 mutex_init(&state->bt_open.bo_mutex, NULL, MUTEX_DRIVER, NULL);
714 714 cv_init(&state->bt_open.bo_exit_cv, NULL, CV_DRIVER, NULL);
715 715 state->bt_open.bo_opened = B_FALSE;
716 716 state->bt_map.um_registered = B_FALSE;
717 717
718 718 /* initialize user ring, thread, mapping state */
719 719 e = xpvtap_user_init(state);
720 720 if (e != DDI_SUCCESS) {
721 721 goto drvinitfail_userinit;
722 722 }
723 723
724 724 return (state);
725 725
726 726 drvinitfail_userinit:
727 727 cv_destroy(&state->bt_open.bo_exit_cv);
728 728 mutex_destroy(&state->bt_open.bo_mutex);
729 729 drvinitfail_get_soft_state:
730 730 (void) ddi_soft_state_free(xpvtap_statep, instance);
731 731 return (NULL);
732 732 }
733 733
734 734
735 735 /*
736 736 * xpvtap_drv_fini()
737 737 */
738 738 static void
739 739 xpvtap_drv_fini(xpvtap_state_t *state)
740 740 {
741 741 xpvtap_user_fini(state);
742 742 cv_destroy(&state->bt_open.bo_exit_cv);
743 743 mutex_destroy(&state->bt_open.bo_mutex);
744 744 (void) ddi_soft_state_free(xpvtap_statep, state->bt_instance);
745 745 }
746 746
747 747
748 748 /*
749 749 * xpvtap_intr()
750 750 * this routine will be called when we have a request on the guest ring.
751 751 */
752 752 static uint_t
753 753 xpvtap_intr(caddr_t arg)
754 754 {
755 755 xpvtap_state_t *state;
756 756
757 757
758 758 state = (xpvtap_state_t *)arg;
759 759
760 760 /* wake thread, thread handles guest requests and user app responses */
761 761 mutex_enter(&state->bt_thread.ut_mutex);
762 762 state->bt_thread.ut_wake = B_TRUE;
763 763 cv_signal(&state->bt_thread.ut_wake_cv);
764 764 mutex_exit(&state->bt_thread.ut_mutex);
765 765
766 766 return (DDI_INTR_CLAIMED);
767 767 }
768 768
769 769
770 770 /*
771 771 * xpvtap_segmf_register()
772 772 */
773 773 static int
774 774 xpvtap_segmf_register(xpvtap_state_t *state)
775 775 {
776 776 struct seg *seg;
777 777 uint64_t pte_ma;
778 778 struct as *as;
779 779 caddr_t uaddr;
780 780 uint_t pgcnt;
781 781 int i;
↓ open down ↓ |
781 lines elided |
↑ open up ↑ |
782 782
783 783
784 784 as = state->bt_map.um_as;
785 785 pgcnt = btopr(state->bt_map.um_guest_size);
786 786 uaddr = state->bt_map.um_guest_pages;
787 787
788 788 if (pgcnt == 0) {
789 789 return (DDI_FAILURE);
790 790 }
791 791
792 - AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
792 + AS_LOCK_ENTER(as, RW_READER);
793 793
794 794 seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
795 795 if ((seg == NULL) || ((uaddr + state->bt_map.um_guest_size) >
796 796 (seg->s_base + seg->s_size))) {
797 - AS_LOCK_EXIT(as, &as->a_lock);
797 + AS_LOCK_EXIT(as);
798 798 return (DDI_FAILURE);
799 799 }
800 800
801 801 /*
802 802 * lock down the htables so the HAT can't steal them. Register the
803 803 * PTE MA's for each gref page with seg_mf so we can do user space
804 804 * gref mappings.
805 805 */
806 806 for (i = 0; i < pgcnt; i++) {
807 807 hat_prepare_mapping(as->a_hat, uaddr, &pte_ma);
808 808 hat_devload(as->a_hat, uaddr, PAGESIZE, (pfn_t)0,
809 809 PROT_READ | PROT_WRITE | PROT_USER | HAT_UNORDERED_OK,
810 810 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
811 811 hat_release_mapping(as->a_hat, uaddr);
812 812 segmf_add_gref_pte(seg, uaddr, pte_ma);
813 813 uaddr += PAGESIZE;
814 814 }
815 815
816 816 state->bt_map.um_registered = B_TRUE;
817 817
818 - AS_LOCK_EXIT(as, &as->a_lock);
818 + AS_LOCK_EXIT(as);
819 819
820 820 return (DDI_SUCCESS);
821 821 }
822 822
823 823
824 824 /*
825 825 * xpvtap_segmf_unregister()
826 826 * as_callback routine
827 827 */
828 828 /*ARGSUSED*/
829 829 static void
830 830 xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event)
831 831 {
832 832 xpvtap_state_t *state;
833 833 caddr_t uaddr;
834 834 uint_t pgcnt;
835 835 int i;
836 836
837 837
838 838 state = (xpvtap_state_t *)arg;
839 839 if (!state->bt_map.um_registered) {
840 840 /* remove the callback (which is this routine) */
841 841 (void) as_delete_callback(as, arg);
842 842 return;
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
843 843 }
844 844
845 845 pgcnt = btopr(state->bt_map.um_guest_size);
846 846 uaddr = state->bt_map.um_guest_pages;
847 847
848 848 /* unmap any outstanding req's grefs */
849 849 xpvtap_rs_flush(state->bt_map.um_rs, xpvtap_user_request_unmap, state);
850 850
851 851 /* Unlock the gref pages */
852 852 for (i = 0; i < pgcnt; i++) {
853 - AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
853 + AS_LOCK_ENTER(as, RW_WRITER);
854 854 hat_prepare_mapping(as->a_hat, uaddr, NULL);
855 855 hat_unload(as->a_hat, uaddr, PAGESIZE, HAT_UNLOAD_UNLOCK);
856 856 hat_release_mapping(as->a_hat, uaddr);
857 - AS_LOCK_EXIT(as, &as->a_lock);
857 + AS_LOCK_EXIT(as);
858 858 uaddr += PAGESIZE;
859 859 }
860 860
861 861 /* remove the callback (which is this routine) */
862 862 (void) as_delete_callback(as, arg);
863 863
864 864 state->bt_map.um_registered = B_FALSE;
865 865 }
866 866
867 867
868 868 /*
869 869 * xpvtap_user_init()
870 870 */
871 871 static int
872 872 xpvtap_user_init(xpvtap_state_t *state)
873 873 {
874 874 xpvtap_user_map_t *map;
875 875 int e;
876 876
877 877
878 878 map = &state->bt_map;
879 879
880 880 /* Setup the ring between the driver and user app */
881 881 e = xpvtap_user_ring_init(state);
882 882 if (e != DDI_SUCCESS) {
883 883 return (DDI_FAILURE);
884 884 }
885 885
886 886 /*
887 887 * the user ring can handle BLKIF_RING_SIZE outstanding requests. This
888 888 * is the same number of requests as the guest ring. Initialize the
889 889 * state we use to track request IDs to the user app. These IDs will
890 890 * also identify which group of gref pages correspond with the
891 891 * request.
892 892 */
893 893 xpvtap_rs_init(0, (BLKIF_RING_SIZE - 1), &map->um_rs);
894 894
895 895 /*
896 896 * allocate the space to store a copy of each outstanding requests. We
897 897 * will need to reference the ID and the number of segments when we
898 898 * get the response from the user app.
899 899 */
900 900 map->um_outstanding_reqs = kmem_zalloc(
901 901 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE,
902 902 KM_SLEEP);
903 903
904 904 /*
905 905 * initialize the thread we use to process guest requests and user
906 906 * responses.
907 907 */
908 908 e = xpvtap_user_thread_init(state);
909 909 if (e != DDI_SUCCESS) {
910 910 goto userinitfail_user_thread_init;
911 911 }
912 912
913 913 return (DDI_SUCCESS);
914 914
915 915 userinitfail_user_thread_init:
916 916 xpvtap_rs_fini(&map->um_rs);
917 917 kmem_free(map->um_outstanding_reqs,
918 918 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE);
919 919 xpvtap_user_ring_fini(state);
920 920 return (DDI_FAILURE);
921 921 }
922 922
923 923
924 924 /*
925 925 * xpvtap_user_ring_init()
926 926 */
927 927 static int
928 928 xpvtap_user_ring_init(xpvtap_state_t *state)
929 929 {
930 930 xpvtap_user_ring_t *usring;
931 931
932 932
933 933 usring = &state->bt_user_ring;
934 934
935 935 /* alocate and initialize the page for the shared user ring */
936 936 usring->ur_sring = (blkif_sring_t *)ddi_umem_alloc(PAGESIZE,
937 937 DDI_UMEM_SLEEP, &usring->ur_cookie);
938 938 SHARED_RING_INIT(usring->ur_sring);
939 939 FRONT_RING_INIT(&usring->ur_ring, usring->ur_sring, PAGESIZE);
940 940 usring->ur_prod_polled = 0;
941 941
942 942 return (DDI_SUCCESS);
943 943 }
944 944
945 945
946 946 /*
947 947 * xpvtap_user_thread_init()
948 948 */
949 949 static int
950 950 xpvtap_user_thread_init(xpvtap_state_t *state)
951 951 {
952 952 xpvtap_user_thread_t *thread;
953 953 char taskqname[32];
954 954
955 955
956 956 thread = &state->bt_thread;
957 957
958 958 mutex_init(&thread->ut_mutex, NULL, MUTEX_DRIVER, NULL);
959 959 cv_init(&thread->ut_wake_cv, NULL, CV_DRIVER, NULL);
960 960 cv_init(&thread->ut_exit_done_cv, NULL, CV_DRIVER, NULL);
961 961 thread->ut_wake = B_FALSE;
962 962 thread->ut_exit = B_FALSE;
963 963 thread->ut_exit_done = B_TRUE;
964 964
965 965 /* create but don't start the user thread */
966 966 (void) sprintf(taskqname, "xvptap_%d", state->bt_instance);
967 967 thread->ut_taskq = ddi_taskq_create(state->bt_dip, taskqname, 1,
968 968 TASKQ_DEFAULTPRI, 0);
969 969 if (thread->ut_taskq == NULL) {
970 970 goto userinitthrfail_taskq_create;
971 971 }
972 972
973 973 return (DDI_SUCCESS);
974 974
975 975 userinitthrfail_taskq_dispatch:
976 976 ddi_taskq_destroy(thread->ut_taskq);
977 977 userinitthrfail_taskq_create:
978 978 cv_destroy(&thread->ut_exit_done_cv);
979 979 cv_destroy(&thread->ut_wake_cv);
980 980 mutex_destroy(&thread->ut_mutex);
981 981
982 982 return (DDI_FAILURE);
983 983 }
984 984
985 985
986 986 /*
987 987 * xpvtap_user_thread_start()
988 988 */
989 989 static void
990 990 xpvtap_user_thread_start(caddr_t arg)
991 991 {
992 992 xpvtap_user_thread_t *thread;
993 993 xpvtap_state_t *state;
994 994 int e;
995 995
996 996
997 997 state = (xpvtap_state_t *)arg;
998 998 thread = &state->bt_thread;
999 999
1000 1000 /* start the user thread */
1001 1001 thread->ut_exit_done = B_FALSE;
1002 1002 e = ddi_taskq_dispatch(thread->ut_taskq, xpvtap_user_thread, state,
1003 1003 DDI_SLEEP);
1004 1004 if (e != DDI_SUCCESS) {
1005 1005 thread->ut_exit_done = B_TRUE;
1006 1006 cmn_err(CE_WARN, "Unable to start user thread\n");
1007 1007 }
1008 1008 }
1009 1009
1010 1010
1011 1011 /*
1012 1012 * xpvtap_user_thread_stop()
1013 1013 */
1014 1014 static void
1015 1015 xpvtap_user_thread_stop(xpvtap_state_t *state)
1016 1016 {
1017 1017 /* wake thread so it can exit */
1018 1018 mutex_enter(&state->bt_thread.ut_mutex);
1019 1019 state->bt_thread.ut_wake = B_TRUE;
1020 1020 state->bt_thread.ut_exit = B_TRUE;
1021 1021 cv_signal(&state->bt_thread.ut_wake_cv);
1022 1022 if (!state->bt_thread.ut_exit_done) {
1023 1023 cv_wait(&state->bt_thread.ut_exit_done_cv,
1024 1024 &state->bt_thread.ut_mutex);
1025 1025 }
1026 1026 mutex_exit(&state->bt_thread.ut_mutex);
1027 1027 ASSERT(state->bt_thread.ut_exit_done);
1028 1028 }
1029 1029
1030 1030
1031 1031 /*
1032 1032 * xpvtap_user_fini()
1033 1033 */
1034 1034 static void
1035 1035 xpvtap_user_fini(xpvtap_state_t *state)
1036 1036 {
1037 1037 xpvtap_user_map_t *map;
1038 1038
1039 1039
1040 1040 map = &state->bt_map;
1041 1041
1042 1042 xpvtap_user_thread_fini(state);
1043 1043 xpvtap_rs_fini(&map->um_rs);
1044 1044 kmem_free(map->um_outstanding_reqs,
1045 1045 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE);
1046 1046 xpvtap_user_ring_fini(state);
1047 1047 }
1048 1048
1049 1049
1050 1050 /*
1051 1051 * xpvtap_user_ring_fini()
1052 1052 */
1053 1053 static void
1054 1054 xpvtap_user_ring_fini(xpvtap_state_t *state)
1055 1055 {
1056 1056 ddi_umem_free(state->bt_user_ring.ur_cookie);
1057 1057 }
1058 1058
1059 1059
1060 1060 /*
1061 1061 * xpvtap_user_thread_fini()
1062 1062 */
1063 1063 static void
1064 1064 xpvtap_user_thread_fini(xpvtap_state_t *state)
1065 1065 {
1066 1066 ddi_taskq_destroy(state->bt_thread.ut_taskq);
1067 1067 cv_destroy(&state->bt_thread.ut_exit_done_cv);
1068 1068 cv_destroy(&state->bt_thread.ut_wake_cv);
1069 1069 mutex_destroy(&state->bt_thread.ut_mutex);
1070 1070 }
1071 1071
1072 1072
1073 1073 /*
1074 1074 * xpvtap_user_thread()
1075 1075 */
1076 1076 static void
1077 1077 xpvtap_user_thread(void *arg)
1078 1078 {
1079 1079 xpvtap_user_thread_t *thread;
1080 1080 blkif_response_t resp;
1081 1081 xpvtap_state_t *state;
1082 1082 blkif_request_t req;
1083 1083 boolean_t b;
1084 1084 uint_t uid;
1085 1085 int e;
1086 1086
1087 1087
1088 1088 state = (xpvtap_state_t *)arg;
1089 1089 thread = &state->bt_thread;
1090 1090
1091 1091 xpvtap_thread_start:
1092 1092 /* See if we are supposed to exit */
1093 1093 mutex_enter(&thread->ut_mutex);
1094 1094 if (thread->ut_exit) {
1095 1095 thread->ut_exit_done = B_TRUE;
1096 1096 cv_signal(&state->bt_thread.ut_exit_done_cv);
1097 1097 mutex_exit(&thread->ut_mutex);
1098 1098 return;
1099 1099 }
1100 1100
1101 1101 /*
1102 1102 * if we aren't supposed to be awake, wait until someone wakes us.
1103 1103 * when we wake up, check for a kill or someone telling us to exit.
1104 1104 */
1105 1105 if (!thread->ut_wake) {
1106 1106 e = cv_wait_sig(&thread->ut_wake_cv, &thread->ut_mutex);
1107 1107 if ((e == 0) || (thread->ut_exit)) {
1108 1108 thread->ut_exit = B_TRUE;
1109 1109 mutex_exit(&thread->ut_mutex);
1110 1110 goto xpvtap_thread_start;
1111 1111 }
1112 1112 }
1113 1113
1114 1114 /* if someone didn't wake us, go back to the start of the thread */
1115 1115 if (!thread->ut_wake) {
1116 1116 mutex_exit(&thread->ut_mutex);
1117 1117 goto xpvtap_thread_start;
1118 1118 }
1119 1119
1120 1120 /* we are awake */
1121 1121 thread->ut_wake = B_FALSE;
1122 1122 mutex_exit(&thread->ut_mutex);
1123 1123
1124 1124 /* process requests from the guest */
1125 1125 do {
1126 1126 /*
1127 1127 * check for requests from the guest. if we don't have any,
1128 1128 * break out of the loop.
1129 1129 */
1130 1130 e = blk_ring_request_get(state->bt_guest_ring, &req);
1131 1131 if (e == B_FALSE) {
1132 1132 break;
1133 1133 }
1134 1134
1135 1135 /* we got a request, map the grefs into the user app's VA */
1136 1136 e = xpvtap_user_request_map(state, &req, &uid);
1137 1137 if (e != DDI_SUCCESS) {
1138 1138 /*
1139 1139 * If we couldn't map the request (e.g. user app hasn't
1140 1140 * opened the device yet), requeue it and try again
1141 1141 * later
1142 1142 */
1143 1143 blk_ring_request_requeue(state->bt_guest_ring);
1144 1144 break;
1145 1145 }
1146 1146
1147 1147 /* push the request to the user app */
1148 1148 e = xpvtap_user_request_push(state, &req, uid);
1149 1149 if (e != DDI_SUCCESS) {
1150 1150 resp.id = req.id;
1151 1151 resp.operation = req.operation;
1152 1152 resp.status = BLKIF_RSP_ERROR;
1153 1153 blk_ring_response_put(state->bt_guest_ring, &resp);
1154 1154 }
1155 1155 } while (!thread->ut_exit);
1156 1156
1157 1157 /* process reponses from the user app */
1158 1158 do {
1159 1159 /*
1160 1160 * check for responses from the user app. if we don't have any,
1161 1161 * break out of the loop.
1162 1162 */
1163 1163 b = xpvtap_user_response_get(state, &resp, &uid);
1164 1164 if (b != B_TRUE) {
1165 1165 break;
1166 1166 }
1167 1167
1168 1168 /*
1169 1169 * if we got a response, unmap the grefs from the matching
1170 1170 * request.
1171 1171 */
1172 1172 xpvtap_user_request_unmap(state, uid);
1173 1173
1174 1174 /* push the response to the guest */
1175 1175 blk_ring_response_put(state->bt_guest_ring, &resp);
1176 1176 } while (!thread->ut_exit);
1177 1177
1178 1178 goto xpvtap_thread_start;
1179 1179 }
1180 1180
1181 1181
1182 1182 /*
1183 1183 * xpvtap_user_request_map()
1184 1184 */
1185 1185 static int
1186 1186 xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req,
1187 1187 uint_t *uid)
1188 1188 {
1189 1189 grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
1190 1190 struct seg *seg;
1191 1191 struct as *as;
1192 1192 domid_t domid;
1193 1193 caddr_t uaddr;
1194 1194 uint_t flags;
1195 1195 int i;
1196 1196 int e;
1197 1197
1198 1198
1199 1199 domid = xvdi_get_oeid(state->bt_dip);
1200 1200
1201 1201 as = state->bt_map.um_as;
1202 1202 if ((as == NULL) || (state->bt_map.um_guest_pages == NULL)) {
1203 1203 return (DDI_FAILURE);
1204 1204 }
1205 1205
1206 1206 /* has to happen after segmap returns */
1207 1207 if (!state->bt_map.um_registered) {
1208 1208 /* register the pte's with segmf */
1209 1209 e = xpvtap_segmf_register(state);
1210 1210 if (e != DDI_SUCCESS) {
1211 1211 return (DDI_FAILURE);
1212 1212 }
1213 1213 }
1214 1214
1215 1215 /* alloc an ID for the user ring */
1216 1216 e = xpvtap_rs_alloc(state->bt_map.um_rs, uid);
1217 1217 if (e != DDI_SUCCESS) {
1218 1218 return (DDI_FAILURE);
1219 1219 }
1220 1220
↓ open down ↓ |
353 lines elided |
↑ open up ↑ |
1221 1221 /* if we don't have any segments to map, we're done */
1222 1222 if ((req->operation == BLKIF_OP_WRITE_BARRIER) ||
1223 1223 (req->operation == BLKIF_OP_FLUSH_DISKCACHE) ||
1224 1224 (req->nr_segments == 0)) {
1225 1225 return (DDI_SUCCESS);
1226 1226 }
1227 1227
1228 1228 /* get the apps gref address */
1229 1229 uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, *uid);
1230 1230
1231 - AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1231 + AS_LOCK_ENTER(as, RW_READER);
1232 1232 seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
1233 1233 if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) >
1234 1234 (seg->s_base + seg->s_size))) {
1235 - AS_LOCK_EXIT(as, &as->a_lock);
1235 + AS_LOCK_EXIT(as);
1236 1236 return (DDI_FAILURE);
1237 1237 }
1238 1238
1239 1239 /* if we are reading from disk, we are writing into memory */
1240 1240 flags = 0;
1241 1241 if (req->operation == BLKIF_OP_READ) {
1242 1242 flags |= SEGMF_GREF_WR;
1243 1243 }
1244 1244
1245 1245 /* Load the grefs into seg_mf */
1246 1246 for (i = 0; i < req->nr_segments; i++) {
1247 1247 gref[i] = req->seg[i].gref;
1248 1248 }
1249 1249 (void) segmf_add_grefs(seg, uaddr, flags, gref, req->nr_segments,
1250 1250 domid);
1251 1251
1252 - AS_LOCK_EXIT(as, &as->a_lock);
1252 + AS_LOCK_EXIT(as);
1253 1253
1254 1254 return (DDI_SUCCESS);
1255 1255 }
1256 1256
1257 1257
1258 1258 /*
1259 1259 * xpvtap_user_request_push()
1260 1260 */
1261 1261 static int
1262 1262 xpvtap_user_request_push(xpvtap_state_t *state, blkif_request_t *req,
1263 1263 uint_t uid)
1264 1264 {
1265 1265 blkif_request_t *outstanding_req;
1266 1266 blkif_front_ring_t *uring;
1267 1267 blkif_request_t *target;
1268 1268 xpvtap_user_map_t *map;
1269 1269
1270 1270
1271 1271 uring = &state->bt_user_ring.ur_ring;
1272 1272 map = &state->bt_map;
1273 1273
1274 1274 target = RING_GET_REQUEST(uring, uring->req_prod_pvt);
1275 1275
1276 1276 /*
1277 1277 * Save request from the frontend. used for ID mapping and unmap
1278 1278 * on response/cleanup
1279 1279 */
1280 1280 outstanding_req = &map->um_outstanding_reqs[uid];
1281 1281 bcopy(req, outstanding_req, sizeof (*outstanding_req));
1282 1282
1283 1283 /* put the request on the user ring */
1284 1284 bcopy(req, target, sizeof (*req));
1285 1285 target->id = (uint64_t)uid;
1286 1286 uring->req_prod_pvt++;
1287 1287
1288 1288 pollwakeup(&state->bt_pollhead, POLLIN | POLLRDNORM);
1289 1289
1290 1290 return (DDI_SUCCESS);
1291 1291 }
1292 1292
1293 1293
1294 1294 static void
1295 1295 xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid)
1296 1296 {
1297 1297 blkif_request_t *req;
1298 1298 struct seg *seg;
1299 1299 struct as *as;
1300 1300 caddr_t uaddr;
1301 1301 int e;
1302 1302
1303 1303
1304 1304 as = state->bt_map.um_as;
1305 1305 if (as == NULL) {
1306 1306 return;
↓ open down ↓ |
44 lines elided |
↑ open up ↑ |
1307 1307 }
1308 1308
1309 1309 /* get a copy of the original request */
1310 1310 req = &state->bt_map.um_outstanding_reqs[uid];
1311 1311
1312 1312 /* unmap the grefs for this request */
1313 1313 if ((req->operation != BLKIF_OP_WRITE_BARRIER) &&
1314 1314 (req->operation != BLKIF_OP_FLUSH_DISKCACHE) &&
1315 1315 (req->nr_segments != 0)) {
1316 1316 uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, uid);
1317 - AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1317 + AS_LOCK_ENTER(as, RW_READER);
1318 1318 seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
1319 1319 if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) >
1320 1320 (seg->s_base + seg->s_size))) {
1321 - AS_LOCK_EXIT(as, &as->a_lock);
1321 + AS_LOCK_EXIT(as);
1322 1322 xpvtap_rs_free(state->bt_map.um_rs, uid);
1323 1323 return;
1324 1324 }
1325 1325
1326 1326 e = segmf_release_grefs(seg, uaddr, req->nr_segments);
1327 1327 if (e != 0) {
1328 1328 cmn_err(CE_WARN, "unable to release grefs");
1329 1329 }
1330 1330
1331 - AS_LOCK_EXIT(as, &as->a_lock);
1331 + AS_LOCK_EXIT(as);
1332 1332 }
1333 1333
1334 1334 /* free up the user ring id */
1335 1335 xpvtap_rs_free(state->bt_map.um_rs, uid);
1336 1336 }
1337 1337
1338 1338
1339 1339 static int
1340 1340 xpvtap_user_response_get(xpvtap_state_t *state, blkif_response_t *resp,
1341 1341 uint_t *uid)
1342 1342 {
1343 1343 blkif_front_ring_t *uring;
1344 1344 blkif_response_t *target;
1345 1345
1346 1346
1347 1347 uring = &state->bt_user_ring.ur_ring;
1348 1348
1349 1349 if (!RING_HAS_UNCONSUMED_RESPONSES(uring)) {
1350 1350 return (B_FALSE);
1351 1351 }
1352 1352
1353 1353 target = NULL;
1354 1354 target = RING_GET_RESPONSE(uring, uring->rsp_cons);
1355 1355 if (target == NULL) {
1356 1356 return (B_FALSE);
1357 1357 }
1358 1358
1359 1359 /* copy out the user app response */
1360 1360 bcopy(target, resp, sizeof (*resp));
1361 1361 uring->rsp_cons++;
1362 1362
1363 1363 /* restore the quests id from the original request */
1364 1364 *uid = (uint_t)resp->id;
1365 1365 resp->id = state->bt_map.um_outstanding_reqs[*uid].id;
1366 1366
1367 1367 return (B_TRUE);
1368 1368 }
1369 1369
1370 1370
1371 1371 /*
1372 1372 * xpvtap_user_app_stop()
1373 1373 */
1374 1374 static void xpvtap_user_app_stop(caddr_t arg)
1375 1375 {
1376 1376 xpvtap_state_t *state;
1377 1377 clock_t rc;
1378 1378
1379 1379 state = (xpvtap_state_t *)arg;
1380 1380
1381 1381 /*
1382 1382 * Give the app 10 secs to exit. If it doesn't exit, it's not a serious
1383 1383 * problem, we just won't auto-detach the driver.
1384 1384 */
1385 1385 mutex_enter(&state->bt_open.bo_mutex);
1386 1386 if (state->bt_open.bo_opened) {
1387 1387 rc = cv_reltimedwait(&state->bt_open.bo_exit_cv,
1388 1388 &state->bt_open.bo_mutex, drv_usectohz(10000000),
1389 1389 TR_CLOCK_TICK);
1390 1390 if (rc <= 0) {
1391 1391 cmn_err(CE_NOTE, "!user process still has driver open, "
1392 1392 "deferring detach\n");
1393 1393 }
1394 1394 }
1395 1395 mutex_exit(&state->bt_open.bo_mutex);
1396 1396 }
1397 1397
1398 1398
1399 1399 /*
1400 1400 * xpvtap_rs_init()
1401 1401 * Initialize the resource structure. init() returns a handle to be used
1402 1402 * for the rest of the resource functions. This code is written assuming
1403 1403 * that min_val will be close to 0. Therefore, we will allocate the free
1404 1404 * buffer only taking max_val into account.
1405 1405 */
1406 1406 static void
1407 1407 xpvtap_rs_init(uint_t min_val, uint_t max_val, xpvtap_rs_hdl_t *handle)
1408 1408 {
1409 1409 xpvtap_rs_t *rstruct;
1410 1410 uint_t array_size;
1411 1411 uint_t index;
1412 1412
1413 1413
1414 1414 ASSERT(handle != NULL);
1415 1415 ASSERT(min_val < max_val);
1416 1416
1417 1417 /* alloc space for resource structure */
1418 1418 rstruct = kmem_alloc(sizeof (xpvtap_rs_t), KM_SLEEP);
1419 1419
1420 1420 /*
1421 1421 * Test to see if the max value is 64-bit aligned. If so, we don't need
1422 1422 * to allocate an extra 64-bit word. alloc space for free buffer
1423 1423 * (8 bytes per uint64_t).
1424 1424 */
1425 1425 if ((max_val & 0x3F) == 0) {
1426 1426 rstruct->rs_free_size = (max_val >> 6) * 8;
1427 1427 } else {
1428 1428 rstruct->rs_free_size = ((max_val >> 6) + 1) * 8;
1429 1429 }
1430 1430 rstruct->rs_free = kmem_alloc(rstruct->rs_free_size, KM_SLEEP);
1431 1431
1432 1432 /* Initialize resource structure */
1433 1433 rstruct->rs_min = min_val;
1434 1434 rstruct->rs_last = min_val;
1435 1435 rstruct->rs_max = max_val;
1436 1436 mutex_init(&rstruct->rs_mutex, NULL, MUTEX_DRIVER, NULL);
1437 1437 rstruct->rs_flushing = B_FALSE;
1438 1438
1439 1439 /* Mark all resources as free */
1440 1440 array_size = rstruct->rs_free_size >> 3;
1441 1441 for (index = 0; index < array_size; index++) {
1442 1442 rstruct->rs_free[index] = (uint64_t)0xFFFFFFFFFFFFFFFF;
1443 1443 }
1444 1444
1445 1445 /* setup handle which is returned from this function */
1446 1446 *handle = rstruct;
1447 1447 }
1448 1448
1449 1449
1450 1450 /*
1451 1451 * xpvtap_rs_fini()
1452 1452 * Frees up the space allocated in init(). Notice that a pointer to the
1453 1453 * handle is used for the parameter. fini() will set the handle to NULL
1454 1454 * before returning.
1455 1455 */
1456 1456 static void
1457 1457 xpvtap_rs_fini(xpvtap_rs_hdl_t *handle)
1458 1458 {
1459 1459 xpvtap_rs_t *rstruct;
1460 1460
1461 1461
1462 1462 ASSERT(handle != NULL);
1463 1463
1464 1464 rstruct = (xpvtap_rs_t *)*handle;
1465 1465
1466 1466 mutex_destroy(&rstruct->rs_mutex);
1467 1467 kmem_free(rstruct->rs_free, rstruct->rs_free_size);
1468 1468 kmem_free(rstruct, sizeof (xpvtap_rs_t));
1469 1469
1470 1470 /* set handle to null. This helps catch bugs. */
1471 1471 *handle = NULL;
1472 1472 }
1473 1473
1474 1474
1475 1475 /*
1476 1476 * xpvtap_rs_alloc()
1477 1477 * alloc a resource. If alloc fails, we are out of resources.
1478 1478 */
1479 1479 static int
1480 1480 xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *resource)
1481 1481 {
1482 1482 xpvtap_rs_t *rstruct;
1483 1483 uint_t array_idx;
1484 1484 uint64_t free;
1485 1485 uint_t index;
1486 1486 uint_t last;
1487 1487 uint_t min;
1488 1488 uint_t max;
1489 1489
1490 1490
1491 1491 ASSERT(handle != NULL);
1492 1492 ASSERT(resource != NULL);
1493 1493
1494 1494 rstruct = (xpvtap_rs_t *)handle;
1495 1495
1496 1496 mutex_enter(&rstruct->rs_mutex);
1497 1497 min = rstruct->rs_min;
1498 1498 max = rstruct->rs_max;
1499 1499
1500 1500 /*
1501 1501 * Find a free resource. This will return out of the loop once it finds
1502 1502 * a free resource. There are a total of 'max'-'min'+1 resources.
1503 1503 * Performs a round robin allocation.
1504 1504 */
1505 1505 for (index = min; index <= max; index++) {
1506 1506
1507 1507 array_idx = rstruct->rs_last >> 6;
1508 1508 free = rstruct->rs_free[array_idx];
1509 1509 last = rstruct->rs_last & 0x3F;
1510 1510
1511 1511 /* if the next resource to check is free */
1512 1512 if ((free & ((uint64_t)1 << last)) != 0) {
1513 1513 /* we are using this resource */
1514 1514 *resource = rstruct->rs_last;
1515 1515
1516 1516 /* take it out of the free list */
1517 1517 rstruct->rs_free[array_idx] &= ~((uint64_t)1 << last);
1518 1518
1519 1519 /*
1520 1520 * increment the last count so we start checking the
1521 1521 * next resource on the next alloc(). Note the rollover
1522 1522 * at 'max'+1.
1523 1523 */
1524 1524 rstruct->rs_last++;
1525 1525 if (rstruct->rs_last > max) {
1526 1526 rstruct->rs_last = rstruct->rs_min;
1527 1527 }
1528 1528
1529 1529 /* unlock the resource structure */
1530 1530 mutex_exit(&rstruct->rs_mutex);
1531 1531
1532 1532 return (DDI_SUCCESS);
1533 1533 }
1534 1534
1535 1535 /*
1536 1536 * This resource is not free, lets go to the next one. Note the
1537 1537 * rollover at 'max'.
1538 1538 */
1539 1539 rstruct->rs_last++;
1540 1540 if (rstruct->rs_last > max) {
1541 1541 rstruct->rs_last = rstruct->rs_min;
1542 1542 }
1543 1543 }
1544 1544
1545 1545 mutex_exit(&rstruct->rs_mutex);
1546 1546
1547 1547 return (DDI_FAILURE);
1548 1548 }
1549 1549
1550 1550
1551 1551 /*
1552 1552 * xpvtap_rs_free()
1553 1553 * Free the previously alloc'd resource. Once a resource has been free'd,
1554 1554 * it can be used again when alloc is called.
1555 1555 */
1556 1556 static void
1557 1557 xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t resource)
1558 1558 {
1559 1559 xpvtap_rs_t *rstruct;
1560 1560 uint_t array_idx;
1561 1561 uint_t offset;
1562 1562
1563 1563
1564 1564 ASSERT(handle != NULL);
1565 1565
1566 1566 rstruct = (xpvtap_rs_t *)handle;
1567 1567 ASSERT(resource >= rstruct->rs_min);
1568 1568 ASSERT(resource <= rstruct->rs_max);
1569 1569
1570 1570 if (!rstruct->rs_flushing) {
1571 1571 mutex_enter(&rstruct->rs_mutex);
1572 1572 }
1573 1573
1574 1574 /* Put the resource back in the free list */
1575 1575 array_idx = resource >> 6;
1576 1576 offset = resource & 0x3F;
1577 1577 rstruct->rs_free[array_idx] |= ((uint64_t)1 << offset);
1578 1578
1579 1579 if (!rstruct->rs_flushing) {
1580 1580 mutex_exit(&rstruct->rs_mutex);
1581 1581 }
1582 1582 }
1583 1583
1584 1584
1585 1585 /*
1586 1586 * xpvtap_rs_flush()
1587 1587 */
1588 1588 static void
1589 1589 xpvtap_rs_flush(xpvtap_rs_hdl_t handle, xpvtap_rs_cleanup_t callback,
1590 1590 void *arg)
1591 1591 {
1592 1592 xpvtap_rs_t *rstruct;
1593 1593 uint_t array_idx;
1594 1594 uint64_t free;
1595 1595 uint_t index;
1596 1596 uint_t last;
1597 1597 uint_t min;
1598 1598 uint_t max;
1599 1599
1600 1600
1601 1601 ASSERT(handle != NULL);
1602 1602
1603 1603 rstruct = (xpvtap_rs_t *)handle;
1604 1604
1605 1605 mutex_enter(&rstruct->rs_mutex);
1606 1606 min = rstruct->rs_min;
1607 1607 max = rstruct->rs_max;
1608 1608
1609 1609 rstruct->rs_flushing = B_TRUE;
1610 1610
1611 1611 /*
1612 1612 * for all resources not free, call the callback routine to clean it
1613 1613 * up.
1614 1614 */
1615 1615 for (index = min; index <= max; index++) {
1616 1616
1617 1617 array_idx = rstruct->rs_last >> 6;
1618 1618 free = rstruct->rs_free[array_idx];
1619 1619 last = rstruct->rs_last & 0x3F;
1620 1620
1621 1621 /* if the next resource to check is not free */
1622 1622 if ((free & ((uint64_t)1 << last)) == 0) {
1623 1623 /* call the callback to cleanup */
1624 1624 (*callback)(arg, rstruct->rs_last);
1625 1625
1626 1626 /* put it back in the free list */
1627 1627 rstruct->rs_free[array_idx] |= ((uint64_t)1 << last);
1628 1628 }
1629 1629
1630 1630 /* go to the next one. Note the rollover at 'max' */
1631 1631 rstruct->rs_last++;
1632 1632 if (rstruct->rs_last > max) {
1633 1633 rstruct->rs_last = rstruct->rs_min;
1634 1634 }
1635 1635 }
1636 1636
1637 1637 mutex_exit(&rstruct->rs_mutex);
1638 1638 }
↓ open down ↓ |
297 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX