Print this page
5045 use atomic_{inc,dec}_* instead of atomic_add_*
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/rsm/rsm.c
+++ new/usr/src/uts/common/io/rsm/rsm.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 * Copyright 2012 Milan Jurik. All rights reserved.
25 25 */
26 26
27 27
28 28 /*
29 29 * Overview of the RSM Kernel Agent:
30 30 * ---------------------------------
31 31 *
32 32 * rsm.c constitutes the implementation of the RSM kernel agent. The RSM
33 33 * kernel agent is a pseudo device driver which makes use of the RSMPI
34 34 * interface on behalf of the RSMAPI user library.
35 35 *
36 36 * The kernel agent functionality can be categorized into the following
37 37 * components:
38 38 * 1. Driver Infrastructure
39 39 * 2. Export/Import Segment Management
40 40 * 3. Internal resource allocation/deallocation
41 41 *
42 42 * The driver infrastructure includes the basic module loading entry points
43 43 * like _init, _info, _fini to load, unload and report information about
44 44 * the driver module. The driver infrastructure also includes the
45 45 * autoconfiguration entry points namely, attach, detach and getinfo for
46 46 * the device autoconfiguration.
47 47 *
48 48 * The kernel agent is a pseudo character device driver and exports
49 49 * a cb_ops structure which defines the driver entry points for character
50 50 * device access. This includes the open and close entry points. The
51 51 * other entry points provided include ioctl, devmap and segmap and chpoll.
52 52 * read and write entry points are not used since the device is memory
53 53 * mapped. Also ddi_prop_op is used for the prop_op entry point.
54 54 *
55 55 * The ioctl entry point supports a number of commands, which are used by
56 56 * the RSMAPI library in order to export and import segments. These
57 57 * commands include commands for binding and rebinding the physical pages
58 58 * allocated to the virtual address range, publishing the export segment,
59 59 * unpublishing and republishing an export segment, creating an
60 60 * import segment and a virtual connection from this import segment to
61 61 * an export segment, performing scatter-gather data transfer, barrier
62 62 * operations.
63 63 *
64 64 *
65 65 * Export and Import segments:
66 66 * ---------------------------
67 67 *
68 68 * In order to create an RSM export segment a process allocates a range in its
69 69 * virtual address space for the segment using standard Solaris interfaces.
70 70 * The process then calls RSMAPI, which in turn makes an ioctl call to the
71 71 * RSM kernel agent for an allocation of physical memory pages and for
72 72 * creation of the export segment by binding these pages to the virtual
73 73 * address range. These pages are locked in memory so that remote accesses
74 74 * are always applied to the correct page. Then the RSM segment is published,
75 75 * again via RSMAPI making an ioctl to the RSM kernel agent, and a segment id
76 76 * is assigned to it.
77 77 *
78 78 * In order to import a published RSM segment, RSMAPI creates an import
79 79 * segment and forms a virtual connection across the interconnect to the
80 80 * export segment, via an ioctl into the kernel agent with the connect
81 81 * command. The import segment setup is completed by mapping the
82 82 * local device memory into the importers virtual address space. The
83 83 * mapping of the import segment is handled by the segmap/devmap
84 84 * infrastructure described as follows.
85 85 *
86 86 * Segmap and Devmap interfaces:
87 87 *
88 88 * The RSM kernel agent allows device memory to be directly accessed by user
89 89 * threads via memory mapping. In order to do so, the RSM kernel agent
90 90 * supports the devmap and segmap entry points.
91 91 *
92 92 * The segmap entry point(rsm_segmap) is responsible for setting up a memory
93 93 * mapping as requested by mmap. The devmap entry point(rsm_devmap) is
94 94 * responsible for exporting the device memory to the user applications.
95 95 * rsm_segmap calls RSMPI rsm_map to allocate device memory. Then the
96 96 * control is transfered to the devmap_setup call which calls rsm_devmap.
97 97 *
98 98 * rsm_devmap validates the user mapping to the device or kernel memory
99 99 * and passes the information to the system for setting up the mapping. The
100 100 * actual setting up of the mapping is done by devmap_devmem_setup(for
101 101 * device memory) or devmap_umem_setup(for kernel memory). Callbacks are
102 102 * registered for device context management via the devmap_devmem_setup
103 103 * or devmap_umem_setup calls. The callbacks are rsmmap_map, rsmmap_unmap,
104 104 * rsmmap_access, rsmmap_dup. The callbacks are called when a new mapping
105 105 * is created, a mapping is freed, a mapping is accessed or an existing
106 106 * mapping is duplicated respectively. These callbacks allow the RSM kernel
107 107 * agent to maintain state information associated with the mappings.
108 108 * The state information is mainly in the form of a cookie list for the import
109 109 * segment for which mapping has been done.
110 110 *
111 111 * Forced disconnect of import segments:
112 112 *
113 113 * When an exported segment is unpublished, the exporter sends a forced
114 114 * disconnect message to all its importers. The importer segments are
115 115 * unloaded and disconnected. This involves unloading the original
116 116 * mappings and remapping to a preallocated kernel trash page. This is
117 117 * done by devmap_umem_remap. The trash/dummy page is a kernel page,
118 118 * preallocated by the kernel agent during attach using ddi_umem_alloc with
119 119 * the DDI_UMEM_TRASH flag set. This avoids a core dump in the application
120 120 * due to unloading of the original mappings.
121 121 *
122 122 * Additionally every segment has a mapping generation number associated
123 123 * with it. This is an entry in the barrier generation page, created
124 124 * during attach time. This mapping generation number for the import
125 125 * segments is incremented on a force disconnect to notify the application
126 126 * of the force disconnect. On this notification, the application needs
127 127 * to reconnect the segment to establish a new legitimate mapping.
128 128 *
129 129 *
130 130 * Locks used in the kernel agent:
131 131 * -------------------------------
132 132 *
133 133 * The kernel agent uses a variety of mutexes and condition variables for
134 134 * mutual exclusion of the shared data structures and for synchronization
135 135 * between the various threads. Some of the locks are described as follows.
136 136 *
137 137 * Each resource structure, which represents either an export/import segment
138 138 * has a lock associated with it. The lock is the resource mutex, rsmrc_lock.
139 139 * This is used directly by RSMRC_LOCK and RSMRC_UNLOCK macros and in the
140 140 * rsmseglock_acquire and rsmseglock_release macros. An additional
141 141 * lock called the rsmsi_lock is used for the shared import data structure
142 142 * that is relevant for resources representing import segments. There is
143 143 * also a condition variable associated with the resource called s_cv. This
144 144 * is used to wait for events like the segment state change etc.
145 145 *
146 146 * The resource structures are allocated from a pool of resource structures,
147 147 * called rsm_resource. This pool is protected via a reader-writer lock,
148 148 * called rsmrc_lock.
149 149 *
150 150 * There are two separate hash tables, one for the export segments and
151 151 * one for the import segments. The export segments are inserted into the
152 152 * export segment hash table only after they have been published and the
153 153 * import segments are inserted in the import segments list only after they
154 154 * have successfully connected to an exported segment. These tables are
155 155 * protected via reader-writer locks.
156 156 *
157 157 * Debug Support in the kernel agent:
158 158 * ----------------------------------
159 159 *
160 160 * Debugging support in the kernel agent is provided by the following
161 161 * macros.
162 162 *
163 163 * DBG_PRINTF((category, level, message)) is a macro which logs a debug
164 164 * message to the kernel agents debug buffer, rsmka_dbg. This debug buffer
165 165 * can be viewed in kmdb as *rsmka_dbg/s. The message is logged based
166 166 * on the definition of the category and level. All messages that belong to
167 167 * the specified category(rsmdbg_category) and are of an equal or greater
168 168 * severity than the specified level(rsmdbg_level) are logged. The message
169 169 * is a string which uses the same formatting rules as the strings used in
170 170 * printf.
171 171 *
172 172 * The category defines which component of the kernel agent has logged this
173 173 * message. There are a number of categories that have been defined such as
174 174 * RSM_KERNEL_AGENT, RSM_OPS, RSM_IMPORT, RSM_EXPORT etc. A macro,
175 175 * DBG_ADDCATEGORY is used to add in another category to the currently
176 176 * specified category value so that the component using this new category
177 177 * can also effectively log debug messages. Thus, the category of a specific
178 178 * message is some combination of the available categories and we can define
179 179 * sub-categories if we want a finer level of granularity.
180 180 *
181 181 * The level defines the severity of the message. Different level values are
182 182 * defined, with RSM_ERR being the most severe and RSM_DEBUG_VERBOSE being
183 183 * the least severe(debug level is 0).
184 184 *
185 185 * DBG_DEFINE and DBG_DEFINE_STR are macros provided to declare a debug
186 186 * variable or a string respectively.
187 187 *
188 188 *
189 189 * NOTES:
190 190 *
191 191 * Special Fork and Exec Handling:
192 192 * -------------------------------
193 193 *
194 194 * The backing physical pages of an exported segment are always locked down.
195 195 * Thus, there are two cases in which a process having exported segments
196 196 * will cause a cpu to hang: (1) the process invokes exec; (2) a process
197 197 * forks and invokes exit before the duped file descriptors for the export
198 198 * segments are closed in the child process. The hang is caused because the
199 199 * address space release algorithm in Solaris VM subsystem is based on a
200 200 * non-blocking loop which does not terminate while segments are locked
201 201 * down. In addition to this, Solaris VM subsystem lacks a callback
202 202 * mechanism to the rsm kernel agent to allow unlocking these export
203 203 * segment pages.
204 204 *
205 205 * In order to circumvent this problem, the kernel agent does the following.
206 206 * The Solaris VM subsystem keeps memory segments in increasing order of
207 207 * virtual addressses. Thus a special page(special_exit_offset) is allocated
208 208 * by the kernel agent and is mmapped into the heap area of the process address
209 209 * space(the mmap is done by the RSMAPI library). During the mmap processing
210 210 * of this special page by the devmap infrastructure, a callback(the same
211 211 * devmap context management callbacks discussed above) is registered for an
212 212 * unmap.
213 213 *
214 214 * As discussed above, this page is processed by the Solaris address space
215 215 * release code before any of the exported segments pages(which are allocated
216 216 * from high memory). It is during this processing that the unmap callback gets
217 217 * called and this callback is responsible for force destroying the exported
218 218 * segments and thus eliminating the problem of locked pages.
219 219 *
220 220 * Flow-control:
221 221 * ------------
222 222 *
223 223 * A credit based flow control algorithm is used for messages whose
224 224 * processing cannot be done in the interrupt context because it might
225 225 * involve invoking rsmpi calls, or might take a long time to complete
226 226 * or might need to allocate resources. The algorithm operates on a per
227 227 * path basis. To send a message the pathend needs to have a credit and
228 228 * it consumes one for every message that is flow controlled. On the
229 229 * receiving pathend the message is put on a msgbuf_queue and a task is
230 230 * dispatched on the worker thread - recv_taskq where it is processed.
231 231 * After processing the message, the receiving pathend dequeues the message,
232 232 * and if it has processed > RSMIPC_LOTSFREE_MSGBUFS messages sends
233 233 * credits to the sender pathend.
234 234 *
235 235 * RSM_DRTEST:
236 236 * -----------
237 237 *
238 238 * This is used to enable the DR testing using a test driver on test
239 239 * platforms which do not supported DR.
240 240 *
241 241 */
242 242
243 243 #include <sys/types.h>
244 244 #include <sys/param.h>
245 245 #include <sys/user.h>
246 246 #include <sys/buf.h>
247 247 #include <sys/systm.h>
248 248 #include <sys/cred.h>
249 249 #include <sys/vm.h>
250 250 #include <sys/uio.h>
251 251 #include <vm/seg.h>
252 252 #include <vm/page.h>
253 253 #include <sys/stat.h>
254 254
255 255 #include <sys/time.h>
256 256 #include <sys/errno.h>
257 257
258 258 #include <sys/file.h>
259 259 #include <sys/uio.h>
260 260 #include <sys/proc.h>
261 261 #include <sys/mman.h>
262 262 #include <sys/open.h>
263 263 #include <sys/atomic.h>
264 264 #include <sys/mem_config.h>
265 265
266 266
267 267 #include <sys/ddi.h>
268 268 #include <sys/devops.h>
269 269 #include <sys/ddidevmap.h>
270 270 #include <sys/sunddi.h>
271 271 #include <sys/esunddi.h>
272 272 #include <sys/ddi_impldefs.h>
273 273
274 274 #include <sys/kmem.h>
275 275 #include <sys/conf.h>
276 276 #include <sys/devops.h>
277 277 #include <sys/ddi_impldefs.h>
278 278
279 279 #include <sys/modctl.h>
280 280
281 281 #include <sys/policy.h>
282 282 #include <sys/types.h>
283 283 #include <sys/conf.h>
284 284 #include <sys/param.h>
285 285
286 286 #include <sys/taskq.h>
287 287
288 288 #include <sys/rsm/rsm_common.h>
289 289 #include <sys/rsm/rsmapi_common.h>
290 290 #include <sys/rsm/rsm.h>
291 291 #include <rsm_in.h>
292 292 #include <sys/rsm/rsmka_path_int.h>
293 293 #include <sys/rsm/rsmpi.h>
294 294
295 295 #include <sys/modctl.h>
296 296 #include <sys/debug.h>
297 297
298 298 #include <sys/tuneable.h>
299 299
300 300 #ifdef RSM_DRTEST
301 301 extern int rsm_kphysm_setup_func_register(kphysm_setup_vector_t *vec,
302 302 void *arg);
303 303 extern void rsm_kphysm_setup_func_unregister(kphysm_setup_vector_t *vec,
304 304 void *arg);
305 305 #endif
306 306
307 307 extern void dbg_printf(int category, int level, char *fmt, ...);
308 308 extern void rsmka_pathmanager_init();
309 309 extern void rsmka_pathmanager_cleanup();
310 310 extern void rele_sendq_token(sendq_token_t *);
311 311 extern rsm_addr_t get_remote_hwaddr(adapter_t *, rsm_node_id_t);
312 312 extern rsm_node_id_t get_remote_nodeid(adapter_t *, rsm_addr_t);
313 313 extern int rsmka_topology_ioctl(caddr_t, int, int);
314 314
315 315 extern pri_t maxclsyspri;
316 316 extern work_queue_t work_queue;
317 317 extern kmutex_t ipc_info_lock;
318 318 extern kmutex_t ipc_info_cvlock;
319 319 extern kcondvar_t ipc_info_cv;
320 320 extern kmutex_t path_hold_cvlock;
321 321 extern kcondvar_t path_hold_cv;
322 322
323 323 extern kmutex_t rsmka_buf_lock;
324 324
325 325 extern path_t *rsm_find_path(char *, int, rsm_addr_t);
326 326 extern adapter_t *rsmka_lookup_adapter(char *, int);
327 327 extern sendq_token_t *rsmka_get_sendq_token(rsm_node_id_t, sendq_token_t *);
328 328 extern boolean_t rsmka_do_path_active(path_t *, int);
329 329 extern boolean_t rsmka_check_node_alive(rsm_node_id_t);
330 330 extern void rsmka_release_adapter(adapter_t *);
331 331 extern void rsmka_enqueue_msgbuf(path_t *path, void *data);
332 332 extern void rsmka_dequeue_msgbuf(path_t *path);
333 333 extern msgbuf_elem_t *rsmka_gethead_msgbuf(path_t *path);
334 334 /* lint -w2 */
335 335
336 336 static int rsm_open(dev_t *, int, int, cred_t *);
337 337 static int rsm_close(dev_t, int, int, cred_t *);
338 338 static int rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
339 339 cred_t *credp, int *rvalp);
340 340 static int rsm_devmap(dev_t, devmap_cookie_t, offset_t, size_t, size_t *,
341 341 uint_t);
342 342 static int rsm_segmap(dev_t, off_t, struct as *, caddr_t *, off_t, uint_t,
343 343 uint_t, uint_t, cred_t *);
344 344 static int rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
345 345 struct pollhead **phpp);
346 346
347 347 static int rsm_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
348 348 static int rsm_attach(dev_info_t *, ddi_attach_cmd_t);
349 349 static int rsm_detach(dev_info_t *, ddi_detach_cmd_t);
350 350
351 351 static int rsmipc_send(rsm_node_id_t, rsmipc_request_t *, rsmipc_reply_t *);
352 352 static void rsm_force_unload(rsm_node_id_t, rsm_memseg_id_t, boolean_t);
353 353 static void rsm_send_importer_disconnects(rsm_memseg_id_t, rsm_node_id_t);
354 354 static void rsm_send_republish(rsm_memseg_id_t, rsmapi_access_entry_t *, int,
355 355 rsm_permission_t);
356 356 static void rsm_export_force_destroy(ddi_umem_cookie_t *);
357 357 static void rsmacl_free(rsmapi_access_entry_t *, int);
358 358 static void rsmpiacl_free(rsm_access_entry_t *, int);
359 359
360 360 static int rsm_inc_pgcnt(pgcnt_t);
361 361 static void rsm_dec_pgcnt(pgcnt_t);
362 362 static void rsm_free_mapinfo(rsm_mapinfo_t *mapinfop);
363 363 static rsm_mapinfo_t *rsm_get_mapinfo(rsmseg_t *, off_t, size_t, off_t *,
364 364 size_t *);
365 365 static void exporter_quiesce();
366 366 static void rsmseg_suspend(rsmseg_t *, int *);
367 367 static void rsmsegshare_suspend(rsmseg_t *);
368 368 static int rsmseg_resume(rsmseg_t *, void **);
369 369 static int rsmsegshare_resume(rsmseg_t *);
370 370
371 371 static struct cb_ops rsm_cb_ops = {
372 372 rsm_open, /* open */
373 373 rsm_close, /* close */
374 374 nodev, /* strategy */
375 375 nodev, /* print */
376 376 nodev, /* dump */
377 377 nodev, /* read */
378 378 nodev, /* write */
379 379 rsm_ioctl, /* ioctl */
380 380 rsm_devmap, /* devmap */
381 381 NULL, /* mmap */
382 382 rsm_segmap, /* segmap */
383 383 rsm_chpoll, /* poll */
384 384 ddi_prop_op, /* cb_prop_op */
385 385 0, /* streamtab */
386 386 D_NEW|D_MP|D_DEVMAP, /* Driver compatibility flag */
387 387 0,
388 388 0,
389 389 0
390 390 };
391 391
392 392 static struct dev_ops rsm_ops = {
393 393 DEVO_REV, /* devo_rev, */
394 394 0, /* refcnt */
395 395 rsm_info, /* get_dev_info */
396 396 nulldev, /* identify */
397 397 nulldev, /* probe */
398 398 rsm_attach, /* attach */
399 399 rsm_detach, /* detach */
400 400 nodev, /* reset */
401 401 &rsm_cb_ops, /* driver operations */
402 402 (struct bus_ops *)0, /* bus operations */
403 403 0,
404 404 ddi_quiesce_not_needed, /* quiesce */
405 405 };
406 406
407 407 /*
408 408 * Module linkage information for the kernel.
409 409 */
410 410
411 411 static struct modldrv modldrv = {
412 412 &mod_driverops, /* Type of module. This one is a pseudo driver */
413 413 "Remote Shared Memory Driver",
414 414 &rsm_ops, /* driver ops */
415 415 };
416 416
417 417 static struct modlinkage modlinkage = {
418 418 MODREV_1,
419 419 (void *)&modldrv,
420 420 0,
421 421 0,
422 422 0
423 423 };
424 424
425 425 static void rsm_dr_callback_post_add(void *arg, pgcnt_t delta);
426 426 static int rsm_dr_callback_pre_del(void *arg, pgcnt_t delta);
427 427 static void rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled);
428 428
429 429 static kphysm_setup_vector_t rsm_dr_callback_vec = {
430 430 KPHYSM_SETUP_VECTOR_VERSION,
431 431 rsm_dr_callback_post_add,
432 432 rsm_dr_callback_pre_del,
433 433 rsm_dr_callback_post_del
434 434 };
435 435
436 436 /* This flag can be changed to 0 to help with PIT testing */
437 437 int rsmka_modunloadok = 1;
438 438 int no_reply_cnt = 0;
439 439
440 440 uint64_t rsm_ctrlmsg_errcnt = 0;
441 441 uint64_t rsm_ipcsend_errcnt = 0;
442 442
443 443 #define MAX_NODES 64
444 444
445 445 static struct rsm_driver_data rsm_drv_data;
446 446 static struct rsmresource_table rsm_resource;
447 447
448 448 static void rsmresource_insert(minor_t, rsmresource_t *, rsm_resource_type_t);
449 449 static void rsmresource_destroy(void);
450 450 static int rsmresource_alloc(minor_t *);
451 451 static rsmresource_t *rsmresource_free(minor_t rnum);
452 452 static int rsm_closeconnection(rsmseg_t *seg, void **cookie);
453 453 static int rsm_unpublish(rsmseg_t *seg, int mode);
454 454 static int rsm_unbind(rsmseg_t *seg);
455 455 static uint_t rsmhash(rsm_memseg_id_t key);
456 456 static void rsmhash_alloc(rsmhash_table_t *rhash, int size);
457 457 static void rsmhash_free(rsmhash_table_t *rhash, int size);
458 458 static void *rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval);
459 459 static void **rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval);
460 460 static int rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid,
461 461 void *cookie);
462 462 int rsm_disconnect(rsmseg_t *seg);
463 463 void rsmseg_unload(rsmseg_t *);
464 464 void rsm_suspend_complete(rsm_node_id_t src_node, int flag);
465 465
466 466 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
467 467 rsm_intr_q_op_t opcode, rsm_addr_t src,
468 468 void *data, size_t size, rsm_intr_hand_arg_t arg);
469 469
470 470 static void rsm_intr_callback(void *, rsm_addr_t, rsm_intr_hand_arg_t);
471 471
472 472 rsm_node_id_t my_nodeid;
473 473
474 474 /* cookie, va, offsets and length for the barrier */
475 475 static rsm_gnum_t *bar_va;
476 476 static ddi_umem_cookie_t bar_cookie;
477 477 static off_t barrier_offset;
478 478 static size_t barrier_size;
479 479 static int max_segs;
480 480
481 481 /* cookie for the trash memory */
482 482 static ddi_umem_cookie_t remap_cookie;
483 483
484 484 static rsm_memseg_id_t rsm_nextavail_segmentid;
485 485
486 486 extern taskq_t *work_taskq;
487 487 extern char *taskq_name;
488 488
489 489 static dev_info_t *rsm_dip; /* private copy of devinfo pointer */
490 490
491 491 static rsmhash_table_t rsm_export_segs; /* list of exported segs */
492 492 rsmhash_table_t rsm_import_segs; /* list of imported segs */
493 493 static rsmhash_table_t rsm_event_queues; /* list of event queues */
494 494
495 495 static rsm_ipc_t rsm_ipc; /* ipc info */
496 496
497 497 /* list of nodes to which RSMIPC_MSG_SUSPEND has been sent */
498 498 static list_head_t rsm_suspend_list;
499 499
500 500 /* list of descriptors for remote importers */
501 501 static importers_table_t importer_list;
502 502
503 503 kmutex_t rsm_suspend_cvlock;
504 504 kcondvar_t rsm_suspend_cv;
505 505
506 506 static kmutex_t rsm_lock;
507 507
508 508 adapter_t loopback_adapter;
509 509 rsm_controller_attr_t loopback_attr;
510 510
511 511 int rsmipc_send_controlmsg(path_t *path, int msgtype);
512 512
513 513 void rsmka_init_loopback();
514 514
515 515 int rsmka_null_seg_create(
516 516 rsm_controller_handle_t,
517 517 rsm_memseg_export_handle_t *,
518 518 size_t,
519 519 uint_t,
520 520 rsm_memory_local_t *,
521 521 rsm_resource_callback_t,
522 522 rsm_resource_callback_arg_t);
523 523
524 524 int rsmka_null_seg_destroy(
525 525 rsm_memseg_export_handle_t);
526 526
527 527 int rsmka_null_bind(
528 528 rsm_memseg_export_handle_t,
529 529 off_t,
530 530 rsm_memory_local_t *,
531 531 rsm_resource_callback_t,
532 532 rsm_resource_callback_arg_t);
533 533
534 534 int rsmka_null_unbind(
535 535 rsm_memseg_export_handle_t,
536 536 off_t,
537 537 size_t);
538 538
539 539 int rsmka_null_rebind(
540 540 rsm_memseg_export_handle_t,
541 541 off_t,
542 542 rsm_memory_local_t *,
543 543 rsm_resource_callback_t,
544 544 rsm_resource_callback_arg_t);
545 545
546 546 int rsmka_null_publish(
547 547 rsm_memseg_export_handle_t,
548 548 rsm_access_entry_t [],
549 549 uint_t,
550 550 rsm_memseg_id_t,
551 551 rsm_resource_callback_t,
552 552 rsm_resource_callback_arg_t);
553 553
554 554
555 555 int rsmka_null_republish(
556 556 rsm_memseg_export_handle_t,
557 557 rsm_access_entry_t [],
558 558 uint_t,
559 559 rsm_resource_callback_t,
560 560 rsm_resource_callback_arg_t);
561 561
562 562 int rsmka_null_unpublish(
563 563 rsm_memseg_export_handle_t);
564 564
565 565 rsm_ops_t null_rsmpi_ops;
566 566
567 567 /*
568 568 * data and locks to keep track of total amount of exported memory
569 569 */
570 570 static pgcnt_t rsm_pgcnt;
571 571 static pgcnt_t rsm_pgcnt_max; /* max allowed */
572 572 static kmutex_t rsm_pgcnt_lock;
573 573
574 574 static int rsm_enable_dr;
575 575
576 576 static char loopback_str[] = "loopback";
577 577
578 578 int rsm_hash_size;
579 579
580 580 /*
581 581 * The locking model is as follows:
582 582 *
583 583 * Local operations:
584 584 * find resource - grab reader lock on resouce list
585 585 * insert rc - grab writer lock
586 586 * delete rc - grab writer lock and resource mutex
587 587 * read/write - no lock
588 588 *
589 589 * Remote invocations:
590 590 * find resource - grab read lock and resource mutex
591 591 *
592 592 * State:
593 593 * resource state - grab resource mutex
594 594 */
595 595
596 596 int
597 597 _init(void)
598 598 {
599 599 int e;
600 600
601 601 e = mod_install(&modlinkage);
602 602 if (e != 0) {
603 603 return (e);
604 604 }
605 605
606 606 mutex_init(&rsm_lock, NULL, MUTEX_DRIVER, NULL);
607 607
608 608 mutex_init(&rsmka_buf_lock, NULL, MUTEX_DEFAULT, NULL);
609 609
610 610
611 611 rw_init(&rsm_resource.rsmrc_lock, NULL, RW_DRIVER, NULL);
612 612
613 613 rsm_hash_size = RSM_HASHSZ;
614 614
615 615 rw_init(&rsm_export_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
616 616
617 617 rw_init(&rsm_import_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
618 618
619 619 mutex_init(&importer_list.lock, NULL, MUTEX_DRIVER, NULL);
620 620
621 621 mutex_init(&rsm_ipc.lock, NULL, MUTEX_DRIVER, NULL);
622 622 cv_init(&rsm_ipc.cv, NULL, CV_DRIVER, 0);
623 623
624 624 mutex_init(&rsm_suspend_cvlock, NULL, MUTEX_DRIVER, NULL);
625 625 cv_init(&rsm_suspend_cv, NULL, CV_DRIVER, 0);
626 626
627 627 mutex_init(&rsm_drv_data.drv_lock, NULL, MUTEX_DRIVER, NULL);
628 628 cv_init(&rsm_drv_data.drv_cv, NULL, CV_DRIVER, 0);
629 629
630 630 rsm_ipc.count = RSMIPC_SZ;
631 631 rsm_ipc.wanted = 0;
632 632 rsm_ipc.sequence = 0;
633 633
634 634 (void) mutex_init(&rsm_pgcnt_lock, NULL, MUTEX_DRIVER, NULL);
635 635
636 636 for (e = 0; e < RSMIPC_SZ; e++) {
637 637 rsmipc_slot_t *slot = &rsm_ipc.slots[e];
638 638
639 639 RSMIPC_SET(slot, RSMIPC_FREE);
640 640 mutex_init(&slot->rsmipc_lock, NULL, MUTEX_DRIVER, NULL);
641 641 cv_init(&slot->rsmipc_cv, NULL, CV_DRIVER, 0);
642 642 }
643 643
644 644 /*
645 645 * Initialize the suspend message list
646 646 */
647 647 rsm_suspend_list.list_head = NULL;
648 648 mutex_init(&rsm_suspend_list.list_lock, NULL, MUTEX_DRIVER, NULL);
649 649
650 650 /*
651 651 * It is assumed here that configuration data is available
652 652 * during system boot since _init may be called at that time.
653 653 */
654 654
655 655 rsmka_pathmanager_init();
656 656
657 657 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
658 658 "rsm: _init done\n"));
659 659
660 660 return (DDI_SUCCESS);
661 661
662 662 }
663 663
664 664 int
665 665 _info(struct modinfo *modinfop)
666 666 {
667 667
668 668 return (mod_info(&modlinkage, modinfop));
669 669 }
670 670
671 671 int
672 672 _fini(void)
673 673 {
674 674 int e;
675 675
676 676 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
677 677 "rsm: _fini enter\n"));
678 678
679 679 /*
680 680 * The rsmka_modunloadok flag is simply used to help with
681 681 * the PIT testing. Make this flag 0 to disallow modunload.
682 682 */
683 683 if (rsmka_modunloadok == 0)
684 684 return (EBUSY);
685 685
686 686 /* rsm_detach will be called as a result of mod_remove */
687 687 e = mod_remove(&modlinkage);
688 688 if (e) {
689 689 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_ERR,
690 690 "Unable to fini RSM %x\n", e));
691 691 return (e);
692 692 }
693 693
694 694 rsmka_pathmanager_cleanup();
695 695
696 696 rw_destroy(&rsm_resource.rsmrc_lock);
697 697
698 698 rw_destroy(&rsm_export_segs.rsmhash_rw);
699 699 rw_destroy(&rsm_import_segs.rsmhash_rw);
700 700 rw_destroy(&rsm_event_queues.rsmhash_rw);
701 701
702 702 mutex_destroy(&importer_list.lock);
703 703
704 704 mutex_destroy(&rsm_ipc.lock);
705 705 cv_destroy(&rsm_ipc.cv);
706 706
707 707 (void) mutex_destroy(&rsm_suspend_list.list_lock);
708 708
709 709 (void) mutex_destroy(&rsm_pgcnt_lock);
710 710
711 711 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, "_fini done\n"));
712 712
713 713 return (DDI_SUCCESS);
714 714
715 715 }
716 716
717 717 /*ARGSUSED1*/
718 718 static int
719 719 rsm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
720 720 {
721 721 minor_t rnum;
722 722 int percent;
723 723 int ret;
724 724 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
725 725
726 726 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach enter\n"));
727 727
728 728 switch (cmd) {
729 729 case DDI_ATTACH:
730 730 break;
731 731 case DDI_RESUME:
732 732 default:
733 733 DBG_PRINTF((category, RSM_ERR,
734 734 "rsm:rsm_attach - cmd not supported\n"));
735 735 return (DDI_FAILURE);
736 736 }
737 737
738 738 if (rsm_dip != NULL) {
739 739 DBG_PRINTF((category, RSM_ERR,
740 740 "rsm:rsm_attach - supports only "
741 741 "one instance\n"));
742 742 return (DDI_FAILURE);
743 743 }
744 744
745 745 rsm_enable_dr = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
746 746 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
747 747 "enable-dynamic-reconfiguration", 1);
748 748
749 749 mutex_enter(&rsm_drv_data.drv_lock);
750 750 rsm_drv_data.drv_state = RSM_DRV_REG_PROCESSING;
751 751 mutex_exit(&rsm_drv_data.drv_lock);
752 752
753 753 if (rsm_enable_dr) {
754 754 #ifdef RSM_DRTEST
755 755 ret = rsm_kphysm_setup_func_register(&rsm_dr_callback_vec,
756 756 (void *)NULL);
757 757 #else
758 758 ret = kphysm_setup_func_register(&rsm_dr_callback_vec,
759 759 (void *)NULL);
760 760 #endif
761 761 if (ret != 0) {
762 762 mutex_exit(&rsm_drv_data.drv_lock);
763 763 cmn_err(CE_CONT, "rsm:rsm_attach - Dynamic "
764 764 "reconfiguration setup failed\n");
765 765 return (DDI_FAILURE);
766 766 }
767 767 }
768 768
769 769 mutex_enter(&rsm_drv_data.drv_lock);
770 770 ASSERT(rsm_drv_data.drv_state == RSM_DRV_REG_PROCESSING);
771 771 rsm_drv_data.drv_state = RSM_DRV_OK;
772 772 cv_broadcast(&rsm_drv_data.drv_cv);
773 773 mutex_exit(&rsm_drv_data.drv_lock);
774 774
775 775 /*
776 776 * page_list_read_lock();
777 777 * xx_setup();
778 778 * page_list_read_unlock();
779 779 */
780 780
781 781 rsm_hash_size = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
782 782 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
783 783 "segment-hashtable-size", RSM_HASHSZ);
784 784 if (rsm_hash_size == 0) {
785 785 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
786 786 "rsm: segment-hashtable-size in rsm.conf "
787 787 "must be greater than 0, defaulting to 128\n"));
788 788 rsm_hash_size = RSM_HASHSZ;
789 789 }
790 790
791 791 DBG_PRINTF((category, RSM_DEBUG, "rsm_attach rsm_hash_size: %d\n",
792 792 rsm_hash_size));
793 793
794 794 rsm_pgcnt = 0;
795 795
796 796 percent = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
797 797 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
798 798 "max-exported-memory", 0);
799 799 if (percent < 0) {
800 800 DBG_PRINTF((category, RSM_ERR,
801 801 "rsm:rsm_attach not enough memory available to "
802 802 "export, or max-exported-memory set incorrectly.\n"));
803 803 return (DDI_FAILURE);
804 804 }
805 805 /* 0 indicates no fixed upper limit. maxmem is the max */
806 806 /* available pageable physical mem */
807 807 rsm_pgcnt_max = (percent*maxmem)/100;
808 808
809 809 if (rsm_pgcnt_max > 0) {
810 810 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
811 811 "rsm: Available physical memory = %lu pages, "
812 812 "Max exportable memory = %lu pages",
813 813 maxmem, rsm_pgcnt_max));
814 814 }
815 815
816 816 /*
817 817 * Create minor number
818 818 */
819 819 if (rsmresource_alloc(&rnum) != RSM_SUCCESS) {
820 820 DBG_PRINTF((category, RSM_ERR,
821 821 "rsm: rsm_attach - Unable to get "
822 822 "minor number\n"));
823 823 return (DDI_FAILURE);
824 824 }
825 825
826 826 ASSERT(rnum == RSM_DRIVER_MINOR);
827 827
828 828 if (ddi_create_minor_node(devi, DRIVER_NAME, S_IFCHR,
829 829 rnum, DDI_PSEUDO, NULL) == DDI_FAILURE) {
830 830 DBG_PRINTF((category, RSM_ERR,
831 831 "rsm: rsm_attach - unable to allocate "
832 832 "minor #\n"));
833 833 return (DDI_FAILURE);
834 834 }
835 835
836 836 rsm_dip = devi;
837 837 /*
838 838 * Allocate the hashtables
839 839 */
840 840 rsmhash_alloc(&rsm_export_segs, rsm_hash_size);
841 841 rsmhash_alloc(&rsm_import_segs, rsm_hash_size);
842 842
843 843 importer_list.bucket = (importing_token_t **)
844 844 kmem_zalloc(rsm_hash_size * sizeof (importing_token_t *), KM_SLEEP);
845 845
846 846 /*
847 847 * Allocate a resource struct
848 848 */
849 849 {
850 850 rsmresource_t *p;
851 851
852 852 p = (rsmresource_t *)kmem_zalloc(sizeof (*p), KM_SLEEP);
853 853
854 854 mutex_init(&p->rsmrc_lock, NULL, MUTEX_DRIVER, (void *) NULL);
855 855
856 856 rsmresource_insert(rnum, p, RSM_RESOURCE_BAR);
857 857 }
858 858
859 859 /*
860 860 * Based on the rsm.conf property max-segments, determine the maximum
861 861 * number of segments that can be exported/imported. This is then used
862 862 * to determine the size for barrier failure pages.
863 863 */
864 864
865 865 /* First get the max number of segments from the rsm.conf file */
866 866 max_segs = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
867 867 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
868 868 "max-segments", 0);
869 869 if (max_segs == 0) {
870 870 /* Use default number of segments */
871 871 max_segs = RSM_MAX_NUM_SEG;
872 872 }
873 873
874 874 /*
875 875 * Based on the max number of segments allowed, determine the barrier
876 876 * page size. add 1 to max_segs since the barrier page itself uses
877 877 * a slot
878 878 */
879 879 barrier_size = roundup((max_segs + 1) * sizeof (rsm_gnum_t),
880 880 PAGESIZE);
881 881
882 882 /*
883 883 * allocation of the barrier failure page
884 884 */
885 885 bar_va = (rsm_gnum_t *)ddi_umem_alloc(barrier_size,
886 886 DDI_UMEM_SLEEP, &bar_cookie);
887 887
888 888 /*
889 889 * Set the barrier_offset
890 890 */
891 891 barrier_offset = 0;
892 892
893 893 /*
894 894 * Allocate a trash memory and get a cookie for it. This will be used
895 895 * when remapping segments during force disconnects. Allocate the
896 896 * trash memory with a large size which is page aligned.
897 897 */
898 898 (void) ddi_umem_alloc((size_t)TRASHSIZE,
899 899 DDI_UMEM_TRASH, &remap_cookie);
900 900
901 901 /* initialize user segment id allocation variable */
902 902 rsm_nextavail_segmentid = (rsm_memseg_id_t)RSM_USER_APP_ID_BASE;
903 903
904 904 /*
905 905 * initialize the null_rsmpi_ops vector and the loopback adapter
906 906 */
907 907 rsmka_init_loopback();
908 908
909 909
910 910 ddi_report_dev(devi);
911 911
912 912 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach done\n"));
913 913
914 914 return (DDI_SUCCESS);
915 915 }
916 916
917 917 /*
918 918 * The call to mod_remove in the _fine routine will cause the system
919 919 * to call rsm_detach
920 920 */
921 921 /*ARGSUSED*/
922 922 static int
923 923 rsm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
924 924 {
925 925 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
926 926
927 927 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach enter\n"));
928 928
929 929 switch (cmd) {
930 930 case DDI_DETACH:
931 931 break;
932 932 default:
933 933 DBG_PRINTF((category, RSM_ERR,
934 934 "rsm:rsm_detach - cmd %x not supported\n",
935 935 cmd));
936 936 return (DDI_FAILURE);
937 937 }
938 938
939 939 mutex_enter(&rsm_drv_data.drv_lock);
940 940 while (rsm_drv_data.drv_state != RSM_DRV_OK)
941 941 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
942 942 rsm_drv_data.drv_state = RSM_DRV_UNREG_PROCESSING;
943 943 mutex_exit(&rsm_drv_data.drv_lock);
944 944
945 945 /*
946 946 * Unregister the DR callback functions
947 947 */
948 948 if (rsm_enable_dr) {
949 949 #ifdef RSM_DRTEST
950 950 rsm_kphysm_setup_func_unregister(&rsm_dr_callback_vec,
951 951 (void *)NULL);
952 952 #else
953 953 kphysm_setup_func_unregister(&rsm_dr_callback_vec,
954 954 (void *)NULL);
955 955 #endif
956 956 }
957 957
958 958 mutex_enter(&rsm_drv_data.drv_lock);
959 959 ASSERT(rsm_drv_data.drv_state == RSM_DRV_UNREG_PROCESSING);
960 960 rsm_drv_data.drv_state = RSM_DRV_NEW;
961 961 mutex_exit(&rsm_drv_data.drv_lock);
962 962
963 963 ASSERT(rsm_suspend_list.list_head == NULL);
964 964
965 965 /*
966 966 * Release all resources, seglist, controller, ...
967 967 */
968 968
969 969 /* remove intersend queues */
970 970 /* remove registered services */
971 971
972 972
973 973 ddi_remove_minor_node(dip, DRIVER_NAME);
974 974 rsm_dip = NULL;
975 975
976 976 /*
977 977 * Free minor zero resource
978 978 */
979 979 {
980 980 rsmresource_t *p;
981 981
982 982 p = rsmresource_free(RSM_DRIVER_MINOR);
983 983 if (p) {
984 984 mutex_destroy(&p->rsmrc_lock);
985 985 kmem_free((void *)p, sizeof (*p));
986 986 }
987 987 }
988 988
989 989 /*
990 990 * Free resource table
991 991 */
992 992
993 993 rsmresource_destroy();
994 994
995 995 /*
996 996 * Free the hash tables
997 997 */
998 998 rsmhash_free(&rsm_export_segs, rsm_hash_size);
999 999 rsmhash_free(&rsm_import_segs, rsm_hash_size);
1000 1000
1001 1001 kmem_free((void *)importer_list.bucket,
1002 1002 rsm_hash_size * sizeof (importing_token_t *));
1003 1003 importer_list.bucket = NULL;
1004 1004
1005 1005
1006 1006 /* free barrier page */
1007 1007 if (bar_cookie != NULL) {
1008 1008 ddi_umem_free(bar_cookie);
1009 1009 }
1010 1010 bar_va = NULL;
1011 1011 bar_cookie = NULL;
1012 1012
1013 1013 /*
1014 1014 * Free the memory allocated for the trash
1015 1015 */
1016 1016 if (remap_cookie != NULL) {
1017 1017 ddi_umem_free(remap_cookie);
1018 1018 }
1019 1019 remap_cookie = NULL;
1020 1020
1021 1021 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach done\n"));
1022 1022
1023 1023 return (DDI_SUCCESS);
1024 1024 }
1025 1025
1026 1026 /*ARGSUSED*/
1027 1027 static int
1028 1028 rsm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1029 1029 {
1030 1030 register int error;
1031 1031 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
1032 1032
1033 1033 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info enter\n"));
1034 1034
1035 1035 switch (infocmd) {
1036 1036 case DDI_INFO_DEVT2DEVINFO:
1037 1037 if (rsm_dip == NULL)
1038 1038 error = DDI_FAILURE;
1039 1039 else {
1040 1040 *result = (void *)rsm_dip;
1041 1041 error = DDI_SUCCESS;
1042 1042 }
1043 1043 break;
1044 1044 case DDI_INFO_DEVT2INSTANCE:
1045 1045 *result = (void *)0;
1046 1046 error = DDI_SUCCESS;
1047 1047 break;
1048 1048 default:
1049 1049 error = DDI_FAILURE;
1050 1050 }
1051 1051
1052 1052 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info done\n"));
1053 1053 return (error);
1054 1054 }
1055 1055
1056 1056 adapter_t *
1057 1057 rsm_getadapter(rsm_ioctlmsg_t *msg, int mode)
1058 1058 {
1059 1059 adapter_t *adapter;
1060 1060 char adapter_devname[MAXNAMELEN];
1061 1061 int instance;
1062 1062 DBG_DEFINE(category,
1063 1063 RSM_KERNEL_AGENT | RSM_IMPORT | RSM_EXPORT | RSM_IOCTL);
1064 1064
1065 1065 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter enter\n"));
1066 1066
1067 1067 instance = msg->cnum;
1068 1068
1069 1069 if ((msg->cname_len <= 0) || (msg->cname_len > MAXNAMELEN)) {
1070 1070 return (NULL);
1071 1071 }
1072 1072
1073 1073 if (ddi_copyin(msg->cname, adapter_devname, msg->cname_len, mode))
1074 1074 return (NULL);
1075 1075
1076 1076 if (strcmp(adapter_devname, "loopback") == 0)
1077 1077 return (&loopback_adapter);
1078 1078
1079 1079 adapter = rsmka_lookup_adapter(adapter_devname, instance);
1080 1080
1081 1081 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter done\n"));
1082 1082
1083 1083 return (adapter);
1084 1084 }
1085 1085
1086 1086
1087 1087 /*
1088 1088 * *********************** Resource Number Management ********************
1089 1089 * All resources are stored in a simple hash table. The table is an array
1090 1090 * of pointers to resource blks. Each blk contains:
1091 1091 * base - base number of this blk
1092 1092 * used - number of used slots in this blk.
1093 1093 * blks - array of pointers to resource items.
1094 1094 * An entry in a resource blk is empty if it's NULL.
1095 1095 *
1096 1096 * We start with no resource array. Each time we run out of slots, we
1097 1097 * reallocate a new larger array and copy the pointer to the new array and
1098 1098 * a new resource blk is allocated and added to the hash table.
1099 1099 *
1100 1100 * The resource control block contains:
1101 1101 * root - array of pointer of resource blks
1102 1102 * sz - current size of array.
1103 1103 * len - last valid entry in array.
1104 1104 *
1105 1105 * A search operation based on a resource number is as follows:
1106 1106 * index = rnum / RESOURCE_BLKSZ;
1107 1107 * ASSERT(index < resource_block.len);
1108 1108 * ASSERT(index < resource_block.sz);
1109 1109 * offset = rnum % RESOURCE_BLKSZ;
1110 1110 * ASSERT(offset >= resource_block.root[index]->base);
1111 1111 * ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ);
1112 1112 * return resource_block.root[index]->blks[offset];
1113 1113 *
1114 1114 * A resource blk is freed with its used count reachs zero.
1115 1115 */
1116 1116 static int
1117 1117 rsmresource_alloc(minor_t *rnum)
1118 1118 {
1119 1119
1120 1120 /* search for available resource slot */
1121 1121 int i, j, empty = -1;
1122 1122 rsmresource_blk_t *blk;
1123 1123
1124 1124 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1125 1125 "rsmresource_alloc enter\n"));
1126 1126
1127 1127 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1128 1128
1129 1129 /* Try to find an empty slot */
1130 1130 for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1131 1131 blk = rsm_resource.rsmrc_root[i];
1132 1132 if (blk != NULL && blk->rsmrcblk_avail > 0) {
1133 1133 /* found an empty slot in this blk */
1134 1134 for (j = 0; j < RSMRC_BLKSZ; j++) {
1135 1135 if (blk->rsmrcblk_blks[j] == NULL) {
1136 1136 *rnum = (minor_t)
1137 1137 (j + (i * RSMRC_BLKSZ));
1138 1138 /*
1139 1139 * obey gen page limits
1140 1140 */
1141 1141 if (*rnum >= max_segs + 1) {
1142 1142 if (empty < 0) {
1143 1143 rw_exit(&rsm_resource.
1144 1144 rsmrc_lock);
1145 1145 DBG_PRINTF((
1146 1146 RSM_KERNEL_ALL,
1147 1147 RSM_ERR,
1148 1148 "rsmresource"
1149 1149 "_alloc failed:"
1150 1150 "not enough res"
1151 1151 "%d\n", *rnum));
1152 1152 return (RSMERR_INSUFFICIENT_RESOURCES);
1153 1153 } else {
1154 1154 /* use empty slot */
1155 1155 break;
1156 1156 }
1157 1157
1158 1158 }
1159 1159
1160 1160 blk->rsmrcblk_blks[j] = RSMRC_RESERVED;
1161 1161 blk->rsmrcblk_avail--;
1162 1162 rw_exit(&rsm_resource.rsmrc_lock);
1163 1163 DBG_PRINTF((RSM_KERNEL_ALL,
1164 1164 RSM_DEBUG_VERBOSE,
1165 1165 "rsmresource_alloc done\n"));
1166 1166 return (RSM_SUCCESS);
1167 1167 }
1168 1168 }
1169 1169 } else if (blk == NULL && empty < 0) {
1170 1170 /* remember first empty slot */
1171 1171 empty = i;
1172 1172 }
1173 1173 }
1174 1174
1175 1175 /* Couldn't find anything, allocate a new blk */
1176 1176 /*
1177 1177 * Do we need to reallocate the root array
1178 1178 */
1179 1179 if (empty < 0) {
1180 1180 if (rsm_resource.rsmrc_len == rsm_resource.rsmrc_sz) {
1181 1181 /*
1182 1182 * Allocate new array and copy current stuff into it
1183 1183 */
1184 1184 rsmresource_blk_t **p;
1185 1185 uint_t newsz = (uint_t)rsm_resource.rsmrc_sz +
1186 1186 RSMRC_BLKSZ;
1187 1187 /*
1188 1188 * Don't allocate more that max valid rnum
1189 1189 */
1190 1190 if (rsm_resource.rsmrc_len*RSMRC_BLKSZ >=
1191 1191 max_segs + 1) {
1192 1192 rw_exit(&rsm_resource.rsmrc_lock);
1193 1193 return (RSMERR_INSUFFICIENT_RESOURCES);
1194 1194 }
1195 1195
1196 1196 p = (rsmresource_blk_t **)kmem_zalloc(
1197 1197 newsz * sizeof (*p),
1198 1198 KM_SLEEP);
1199 1199
1200 1200 if (rsm_resource.rsmrc_root) {
1201 1201 uint_t oldsz;
1202 1202
1203 1203 oldsz = (uint_t)(rsm_resource.rsmrc_sz *
1204 1204 (int)sizeof (*p));
1205 1205
1206 1206 /*
1207 1207 * Copy old data into new space and
1208 1208 * free old stuff
1209 1209 */
1210 1210 bcopy(rsm_resource.rsmrc_root, p, oldsz);
1211 1211 kmem_free(rsm_resource.rsmrc_root, oldsz);
1212 1212 }
1213 1213
1214 1214 rsm_resource.rsmrc_root = p;
1215 1215 rsm_resource.rsmrc_sz = (int)newsz;
1216 1216 }
1217 1217
1218 1218 empty = rsm_resource.rsmrc_len;
1219 1219 rsm_resource.rsmrc_len++;
1220 1220 }
1221 1221
1222 1222 /*
1223 1223 * Allocate a new blk
1224 1224 */
1225 1225 blk = (rsmresource_blk_t *)kmem_zalloc(sizeof (*blk), KM_SLEEP);
1226 1226 ASSERT(rsm_resource.rsmrc_root[empty] == NULL);
1227 1227 rsm_resource.rsmrc_root[empty] = blk;
1228 1228 blk->rsmrcblk_avail = RSMRC_BLKSZ - 1;
1229 1229
1230 1230 /*
1231 1231 * Allocate slot
1232 1232 */
1233 1233
1234 1234 *rnum = (minor_t)(empty * RSMRC_BLKSZ);
1235 1235
1236 1236 /*
1237 1237 * watch out not to exceed bounds of barrier page
1238 1238 */
1239 1239 if (*rnum >= max_segs + 1) {
1240 1240 rw_exit(&rsm_resource.rsmrc_lock);
1241 1241 DBG_PRINTF((RSM_KERNEL_ALL, RSM_ERR,
1242 1242 "rsmresource_alloc failed %d\n", *rnum));
1243 1243
1244 1244 return (RSMERR_INSUFFICIENT_RESOURCES);
1245 1245 }
1246 1246 blk->rsmrcblk_blks[0] = RSMRC_RESERVED;
1247 1247
1248 1248
1249 1249 rw_exit(&rsm_resource.rsmrc_lock);
1250 1250
1251 1251 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1252 1252 "rsmresource_alloc done\n"));
1253 1253
1254 1254 return (RSM_SUCCESS);
1255 1255 }
1256 1256
1257 1257 static rsmresource_t *
1258 1258 rsmresource_free(minor_t rnum)
1259 1259 {
1260 1260
1261 1261 /* search for available resource slot */
1262 1262 int i, j;
1263 1263 rsmresource_blk_t *blk;
1264 1264 rsmresource_t *p;
1265 1265
1266 1266 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1267 1267 "rsmresource_free enter\n"));
1268 1268
1269 1269 i = (int)(rnum / RSMRC_BLKSZ);
1270 1270 j = (int)(rnum % RSMRC_BLKSZ);
1271 1271
1272 1272 if (i >= rsm_resource.rsmrc_len) {
1273 1273 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1274 1274 "rsmresource_free done\n"));
1275 1275 return (NULL);
1276 1276 }
1277 1277
1278 1278 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1279 1279
1280 1280 ASSERT(rsm_resource.rsmrc_root);
1281 1281 ASSERT(i < rsm_resource.rsmrc_len);
1282 1282 ASSERT(i < rsm_resource.rsmrc_sz);
1283 1283 blk = rsm_resource.rsmrc_root[i];
1284 1284 if (blk == NULL) {
1285 1285 rw_exit(&rsm_resource.rsmrc_lock);
1286 1286 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1287 1287 "rsmresource_free done\n"));
1288 1288 return (NULL);
1289 1289 }
1290 1290
1291 1291 ASSERT(blk->rsmrcblk_blks[j]); /* reserved or full */
1292 1292
1293 1293 p = blk->rsmrcblk_blks[j];
1294 1294 if (p == RSMRC_RESERVED) {
1295 1295 p = NULL;
1296 1296 }
1297 1297
1298 1298 blk->rsmrcblk_blks[j] = NULL;
1299 1299 blk->rsmrcblk_avail++;
1300 1300 if (blk->rsmrcblk_avail == RSMRC_BLKSZ) {
1301 1301 /* free this blk */
1302 1302 kmem_free(blk, sizeof (*blk));
1303 1303 rsm_resource.rsmrc_root[i] = NULL;
1304 1304 }
1305 1305
1306 1306 rw_exit(&rsm_resource.rsmrc_lock);
1307 1307
1308 1308 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1309 1309 "rsmresource_free done\n"));
1310 1310
1311 1311 return (p);
1312 1312 }
1313 1313
1314 1314 static rsmresource_t *
1315 1315 rsmresource_lookup(minor_t rnum, int lock)
1316 1316 {
1317 1317 int i, j;
1318 1318 rsmresource_blk_t *blk;
1319 1319 rsmresource_t *p;
1320 1320
1321 1321 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1322 1322 "rsmresource_lookup enter\n"));
1323 1323
1324 1324 /* Find resource and lock it in READER mode */
1325 1325 /* search for available resource slot */
1326 1326
1327 1327 i = (int)(rnum / RSMRC_BLKSZ);
1328 1328 j = (int)(rnum % RSMRC_BLKSZ);
1329 1329
1330 1330 if (i >= rsm_resource.rsmrc_len) {
1331 1331 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1332 1332 "rsmresource_lookup done\n"));
1333 1333 return (NULL);
1334 1334 }
1335 1335
1336 1336 rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1337 1337
1338 1338 blk = rsm_resource.rsmrc_root[i];
1339 1339 if (blk != NULL) {
1340 1340 ASSERT(i < rsm_resource.rsmrc_len);
1341 1341 ASSERT(i < rsm_resource.rsmrc_sz);
1342 1342
1343 1343 p = blk->rsmrcblk_blks[j];
1344 1344 if (lock == RSM_LOCK) {
1345 1345 if (p != RSMRC_RESERVED) {
1346 1346 mutex_enter(&p->rsmrc_lock);
1347 1347 } else {
1348 1348 p = NULL;
1349 1349 }
1350 1350 }
1351 1351 } else {
1352 1352 p = NULL;
1353 1353 }
1354 1354 rw_exit(&rsm_resource.rsmrc_lock);
1355 1355
1356 1356 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1357 1357 "rsmresource_lookup done\n"));
1358 1358
1359 1359 return (p);
1360 1360 }
1361 1361
1362 1362 static void
1363 1363 rsmresource_insert(minor_t rnum, rsmresource_t *p, rsm_resource_type_t type)
1364 1364 {
1365 1365 /* Find resource and lock it in READER mode */
1366 1366 /* Caller can upgrade if need be */
1367 1367 /* search for available resource slot */
1368 1368 int i, j;
1369 1369 rsmresource_blk_t *blk;
1370 1370
1371 1371 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1372 1372 "rsmresource_insert enter\n"));
1373 1373
1374 1374 i = (int)(rnum / RSMRC_BLKSZ);
1375 1375 j = (int)(rnum % RSMRC_BLKSZ);
1376 1376
1377 1377 p->rsmrc_type = type;
1378 1378 p->rsmrc_num = rnum;
1379 1379
1380 1380 rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1381 1381
1382 1382 ASSERT(rsm_resource.rsmrc_root);
1383 1383 ASSERT(i < rsm_resource.rsmrc_len);
1384 1384 ASSERT(i < rsm_resource.rsmrc_sz);
1385 1385
1386 1386 blk = rsm_resource.rsmrc_root[i];
1387 1387 ASSERT(blk);
1388 1388
1389 1389 ASSERT(blk->rsmrcblk_blks[j] == RSMRC_RESERVED);
1390 1390
1391 1391 blk->rsmrcblk_blks[j] = p;
1392 1392
1393 1393 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1394 1394 "rsmresource_insert done\n"));
1395 1395
1396 1396 rw_exit(&rsm_resource.rsmrc_lock);
1397 1397 }
1398 1398
1399 1399 static void
1400 1400 rsmresource_destroy()
1401 1401 {
1402 1402 int i, j;
1403 1403
1404 1404 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1405 1405 "rsmresource_destroy enter\n"));
1406 1406
1407 1407 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1408 1408
1409 1409 for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1410 1410 rsmresource_blk_t *blk;
1411 1411
1412 1412 blk = rsm_resource.rsmrc_root[i];
1413 1413 if (blk == NULL) {
1414 1414 continue;
1415 1415 }
1416 1416 for (j = 0; j < RSMRC_BLKSZ; j++) {
1417 1417 if (blk->rsmrcblk_blks[j] != NULL) {
1418 1418 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1419 1419 "Not null slot %d, %lx\n", j,
1420 1420 (size_t)blk->rsmrcblk_blks[j]));
1421 1421 }
1422 1422 }
1423 1423 kmem_free(blk, sizeof (*blk));
1424 1424 rsm_resource.rsmrc_root[i] = NULL;
1425 1425 }
1426 1426 if (rsm_resource.rsmrc_root) {
1427 1427 i = rsm_resource.rsmrc_sz * (int)sizeof (rsmresource_blk_t *);
1428 1428 kmem_free(rsm_resource.rsmrc_root, (uint_t)i);
1429 1429 rsm_resource.rsmrc_root = NULL;
1430 1430 rsm_resource.rsmrc_len = 0;
1431 1431 rsm_resource.rsmrc_sz = 0;
1432 1432 }
1433 1433
1434 1434 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1435 1435 "rsmresource_destroy done\n"));
1436 1436
1437 1437 rw_exit(&rsm_resource.rsmrc_lock);
1438 1438 }
1439 1439
1440 1440
1441 1441 /* ******************** Generic Key Hash Table Management ********* */
1442 1442 static rsmresource_t *
1443 1443 rsmhash_lookup(rsmhash_table_t *rhash, rsm_memseg_id_t key,
1444 1444 rsm_resource_state_t state)
1445 1445 {
1446 1446 rsmresource_t *p;
1447 1447 uint_t hashval;
1448 1448 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1449 1449
1450 1450 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup enter\n"));
1451 1451
1452 1452 hashval = rsmhash(key);
1453 1453
1454 1454 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_lookup %u=%d\n",
1455 1455 key, hashval));
1456 1456
1457 1457 rw_enter(&rhash->rsmhash_rw, RW_READER);
1458 1458
1459 1459 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1460 1460
1461 1461 for (; p; p = p->rsmrc_next) {
1462 1462 if (p->rsmrc_key == key) {
1463 1463 /* acquire resource lock */
1464 1464 RSMRC_LOCK(p);
1465 1465 break;
1466 1466 }
1467 1467 }
1468 1468
1469 1469 rw_exit(&rhash->rsmhash_rw);
1470 1470
1471 1471 if (p != NULL && p->rsmrc_state != state) {
1472 1472 /* state changed, release lock and return null */
1473 1473 RSMRC_UNLOCK(p);
1474 1474 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1475 1475 "rsmhash_lookup done: state changed\n"));
1476 1476 return (NULL);
1477 1477 }
1478 1478
1479 1479 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup done\n"));
1480 1480
1481 1481 return (p);
1482 1482 }
1483 1483
1484 1484 static void
1485 1485 rsmhash_rm(rsmhash_table_t *rhash, rsmresource_t *rcelm)
1486 1486 {
1487 1487 rsmresource_t *p, **back;
1488 1488 uint_t hashval;
1489 1489 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1490 1490
1491 1491 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm enter\n"));
1492 1492
1493 1493 hashval = rsmhash(rcelm->rsmrc_key);
1494 1494
1495 1495 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_rm %u=%d\n",
1496 1496 rcelm->rsmrc_key, hashval));
1497 1497
1498 1498 /*
1499 1499 * It's ok not to find the segment.
1500 1500 */
1501 1501 rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1502 1502
1503 1503 back = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1504 1504
1505 1505 for (; (p = *back) != NULL; back = &p->rsmrc_next) {
1506 1506 if (p == rcelm) {
1507 1507 *back = rcelm->rsmrc_next;
1508 1508 break;
1509 1509 }
1510 1510 }
1511 1511
1512 1512 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm done\n"));
1513 1513
1514 1514 rw_exit(&rhash->rsmhash_rw);
1515 1515 }
1516 1516
1517 1517 static int
1518 1518 rsmhash_add(rsmhash_table_t *rhash, rsmresource_t *new, rsm_memseg_id_t key,
1519 1519 int dup_check, rsm_resource_state_t state)
1520 1520 {
1521 1521 rsmresource_t *p = NULL, **bktp;
1522 1522 uint_t hashval;
1523 1523 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1524 1524
1525 1525 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add enter\n"));
1526 1526
1527 1527 /* lock table */
1528 1528 rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1529 1529
1530 1530 /*
1531 1531 * If the current resource state is other than the state passed in
1532 1532 * then the resource is (probably) already on the list. eg. for an
1533 1533 * import segment if the state is not RSM_STATE_NEW then it's on the
1534 1534 * list already.
1535 1535 */
1536 1536 RSMRC_LOCK(new);
1537 1537 if (new->rsmrc_state != state) {
1538 1538 RSMRC_UNLOCK(new);
1539 1539 rw_exit(&rhash->rsmhash_rw);
1540 1540 return (RSMERR_BAD_SEG_HNDL);
1541 1541 }
1542 1542
1543 1543 hashval = rsmhash(key);
1544 1544 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_add %d\n", hashval));
1545 1545
1546 1546 if (dup_check) {
1547 1547 /*
1548 1548 * Used for checking export segments; don't want to have
1549 1549 * the same key used for multiple segments.
1550 1550 */
1551 1551
1552 1552 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1553 1553
1554 1554 for (; p; p = p->rsmrc_next) {
1555 1555 if (p->rsmrc_key == key) {
1556 1556 RSMRC_UNLOCK(new);
1557 1557 break;
1558 1558 }
1559 1559 }
1560 1560 }
1561 1561
1562 1562 if (p == NULL) {
1563 1563 /* Key doesn't exist, add it */
1564 1564
1565 1565 bktp = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1566 1566
1567 1567 new->rsmrc_key = key;
1568 1568 new->rsmrc_next = *bktp;
1569 1569 *bktp = new;
1570 1570 }
1571 1571
1572 1572 rw_exit(&rhash->rsmhash_rw);
1573 1573
1574 1574 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add done\n"));
1575 1575
1576 1576 return (p == NULL ? RSM_SUCCESS : RSMERR_SEGID_IN_USE);
1577 1577 }
1578 1578
1579 1579 /*
1580 1580 * XOR each byte of the key.
1581 1581 */
1582 1582 static uint_t
1583 1583 rsmhash(rsm_memseg_id_t key)
1584 1584 {
1585 1585 uint_t hash = key;
1586 1586
1587 1587 hash ^= (key >> 8);
1588 1588 hash ^= (key >> 16);
1589 1589 hash ^= (key >> 24);
1590 1590
1591 1591 return (hash % rsm_hash_size);
1592 1592
1593 1593 }
1594 1594
1595 1595 /*
1596 1596 * generic function to get a specific bucket
1597 1597 */
1598 1598 static void *
1599 1599 rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval)
1600 1600 {
1601 1601
1602 1602 if (rhash->bucket == NULL)
1603 1603 return (NULL);
1604 1604 else
1605 1605 return ((void *)rhash->bucket[hashval]);
1606 1606 }
1607 1607
1608 1608 /*
1609 1609 * generic function to get a specific bucket's address
1610 1610 */
1611 1611 static void **
1612 1612 rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval)
1613 1613 {
1614 1614 if (rhash->bucket == NULL)
1615 1615 return (NULL);
1616 1616 else
1617 1617 return ((void **)&(rhash->bucket[hashval]));
1618 1618 }
1619 1619
1620 1620 /*
1621 1621 * generic function to alloc a hash table
1622 1622 */
1623 1623 static void
1624 1624 rsmhash_alloc(rsmhash_table_t *rhash, int size)
1625 1625 {
1626 1626 rhash->bucket = (rsmresource_t **)
1627 1627 kmem_zalloc(size * sizeof (rsmresource_t *), KM_SLEEP);
1628 1628 }
1629 1629
1630 1630 /*
1631 1631 * generic function to free a hash table
1632 1632 */
1633 1633 static void
1634 1634 rsmhash_free(rsmhash_table_t *rhash, int size)
1635 1635 {
1636 1636
1637 1637 kmem_free((void *)rhash->bucket, size * sizeof (caddr_t));
1638 1638 rhash->bucket = NULL;
1639 1639
1640 1640 }
1641 1641 /* *********************** Exported Segment Key Management ************ */
1642 1642
1643 1643 #define rsmexport_add(new, key) \
1644 1644 rsmhash_add(&rsm_export_segs, (rsmresource_t *)new, key, 1, \
1645 1645 RSM_STATE_BIND)
1646 1646
1647 1647 #define rsmexport_rm(arg) \
1648 1648 rsmhash_rm(&rsm_export_segs, (rsmresource_t *)(arg))
1649 1649
1650 1650 #define rsmexport_lookup(key) \
1651 1651 (rsmseg_t *)rsmhash_lookup(&rsm_export_segs, key, RSM_STATE_EXPORT)
1652 1652
1653 1653 /* ************************** Import Segment List Management ********** */
1654 1654
1655 1655 /*
1656 1656 * Add segment to import list. This will be useful for paging and loopback
1657 1657 * segment unloading.
1658 1658 */
1659 1659 #define rsmimport_add(arg, key) \
1660 1660 rsmhash_add(&rsm_import_segs, (rsmresource_t *)(arg), (key), 0, \
1661 1661 RSM_STATE_NEW)
1662 1662
1663 1663 #define rsmimport_rm(arg) \
1664 1664 rsmhash_rm(&rsm_import_segs, (rsmresource_t *)(arg))
1665 1665
1666 1666 /*
1667 1667 * #define rsmimport_lookup(key) \
1668 1668 * (rsmseg_t *)rsmhash_lookup(&rsm_import_segs, (key), RSM_STATE_CONNECT)
1669 1669 */
1670 1670
1671 1671 /*
1672 1672 * increase the ref count and make the import segment point to the
1673 1673 * shared data structure. Return a pointer to the share data struct
1674 1674 * and the shared data struct is locked upon return
1675 1675 */
1676 1676 static rsm_import_share_t *
1677 1677 rsmshare_get(rsm_memseg_id_t key, rsm_node_id_t node, adapter_t *adapter,
1678 1678 rsmseg_t *segp)
1679 1679 {
1680 1680 uint_t hash;
1681 1681 rsmresource_t *p;
1682 1682 rsm_import_share_t *shdatap;
1683 1683 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1684 1684
1685 1685 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get enter\n"));
1686 1686
1687 1687 hash = rsmhash(key);
1688 1688 /* lock table */
1689 1689 rw_enter(&rsm_import_segs.rsmhash_rw, RW_WRITER);
1690 1690 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmshare_get:key=%u, hash=%d\n",
1691 1691 key, hash));
1692 1692
1693 1693 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hash);
1694 1694
1695 1695 for (; p; p = p->rsmrc_next) {
1696 1696 /*
1697 1697 * Look for an entry that is importing the same exporter
1698 1698 * with the share data structure allocated.
1699 1699 */
1700 1700 if ((p->rsmrc_key == key) &&
1701 1701 (p->rsmrc_node == node) &&
1702 1702 (p->rsmrc_adapter == adapter) &&
1703 1703 (((rsmseg_t *)p)->s_share != NULL)) {
1704 1704 shdatap = ((rsmseg_t *)p)->s_share;
1705 1705 break;
1706 1706 }
1707 1707 }
1708 1708
1709 1709 if (p == NULL) {
1710 1710 /* we are the first importer, create the shared data struct */
1711 1711 shdatap = kmem_zalloc(sizeof (rsm_import_share_t), KM_SLEEP);
1712 1712 shdatap->rsmsi_state = RSMSI_STATE_NEW;
1713 1713 shdatap->rsmsi_segid = key;
1714 1714 shdatap->rsmsi_node = node;
1715 1715 mutex_init(&shdatap->rsmsi_lock, NULL, MUTEX_DRIVER, NULL);
1716 1716 cv_init(&shdatap->rsmsi_cv, NULL, CV_DRIVER, 0);
1717 1717 }
1718 1718
1719 1719 rsmseglock_acquire(segp);
1720 1720
1721 1721 /* we grab the shared lock before returning from this function */
1722 1722 mutex_enter(&shdatap->rsmsi_lock);
1723 1723
1724 1724 shdatap->rsmsi_refcnt++;
1725 1725 segp->s_share = shdatap;
1726 1726
1727 1727 rsmseglock_release(segp);
1728 1728
1729 1729 rw_exit(&rsm_import_segs.rsmhash_rw);
1730 1730
1731 1731 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get done\n"));
1732 1732
1733 1733 return (shdatap);
1734 1734 }
1735 1735
1736 1736 /*
1737 1737 * the shared data structure should be locked before calling
1738 1738 * rsmsharecv_signal().
1739 1739 * Change the state and signal any waiting segments.
1740 1740 */
1741 1741 void
1742 1742 rsmsharecv_signal(rsmseg_t *seg, int oldstate, int newstate)
1743 1743 {
1744 1744 ASSERT(rsmsharelock_held(seg));
1745 1745
1746 1746 if (seg->s_share->rsmsi_state == oldstate) {
1747 1747 seg->s_share->rsmsi_state = newstate;
1748 1748 cv_broadcast(&seg->s_share->rsmsi_cv);
1749 1749 }
1750 1750 }
1751 1751
1752 1752 /*
1753 1753 * Add to the hash table
1754 1754 */
1755 1755 static void
1756 1756 importer_list_add(rsm_node_id_t node, rsm_memseg_id_t key, rsm_addr_t hwaddr,
1757 1757 void *cookie)
1758 1758 {
1759 1759
1760 1760 importing_token_t *head;
1761 1761 importing_token_t *new_token;
1762 1762 int index;
1763 1763
1764 1764 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1765 1765
1766 1766 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add enter\n"));
1767 1767
1768 1768 new_token = kmem_zalloc(sizeof (importing_token_t), KM_SLEEP);
1769 1769 new_token->importing_node = node;
1770 1770 new_token->key = key;
1771 1771 new_token->import_segment_cookie = cookie;
1772 1772 new_token->importing_adapter_hwaddr = hwaddr;
1773 1773
1774 1774 index = rsmhash(key);
1775 1775
1776 1776 mutex_enter(&importer_list.lock);
1777 1777
1778 1778 head = importer_list.bucket[index];
1779 1779 importer_list.bucket[index] = new_token;
1780 1780 new_token->next = head;
1781 1781 mutex_exit(&importer_list.lock);
1782 1782
1783 1783 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add done\n"));
1784 1784 }
1785 1785
1786 1786 static void
1787 1787 importer_list_rm(rsm_node_id_t node, rsm_memseg_id_t key, void *cookie)
1788 1788 {
1789 1789
1790 1790 importing_token_t *prev, *token = NULL;
1791 1791 int index;
1792 1792 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1793 1793
1794 1794 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm enter\n"));
1795 1795
1796 1796 index = rsmhash(key);
1797 1797
1798 1798 mutex_enter(&importer_list.lock);
1799 1799
1800 1800 token = importer_list.bucket[index];
1801 1801
1802 1802 prev = token;
1803 1803 while (token != NULL) {
1804 1804 if (token->importing_node == node &&
1805 1805 token->import_segment_cookie == cookie) {
1806 1806 if (prev == token)
1807 1807 importer_list.bucket[index] = token->next;
1808 1808 else
1809 1809 prev->next = token->next;
1810 1810 kmem_free((void *)token, sizeof (*token));
1811 1811 break;
1812 1812 } else {
1813 1813 prev = token;
1814 1814 token = token->next;
1815 1815 }
1816 1816 }
1817 1817
1818 1818 mutex_exit(&importer_list.lock);
1819 1819
1820 1820 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm done\n"));
1821 1821
1822 1822
1823 1823 }
1824 1824
1825 1825 /* **************************Segment Structure Management ************* */
1826 1826
1827 1827 /*
1828 1828 * Free segment structure
1829 1829 */
1830 1830 static void
1831 1831 rsmseg_free(rsmseg_t *seg)
1832 1832 {
1833 1833
1834 1834 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1835 1835
1836 1836 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free enter\n"));
1837 1837
1838 1838 /* need to take seglock here to avoid race with rsmmap_unmap() */
1839 1839 rsmseglock_acquire(seg);
1840 1840 if (seg->s_ckl != NULL) {
1841 1841 /* Segment is still busy */
1842 1842 seg->s_state = RSM_STATE_END;
1843 1843 rsmseglock_release(seg);
1844 1844 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1845 1845 "rsmseg_free done\n"));
1846 1846 return;
1847 1847 }
1848 1848
1849 1849 rsmseglock_release(seg);
1850 1850
1851 1851 ASSERT(seg->s_state == RSM_STATE_END || seg->s_state == RSM_STATE_NEW);
1852 1852
1853 1853 /*
1854 1854 * If it's an importer decrement the refcount
1855 1855 * and if its down to zero free the shared data structure.
1856 1856 * This is where failures during rsm_connect() are unrefcounted
1857 1857 */
1858 1858 if (seg->s_share != NULL) {
1859 1859
1860 1860 ASSERT(seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT);
1861 1861
1862 1862 rsmsharelock_acquire(seg);
1863 1863
1864 1864 ASSERT(seg->s_share->rsmsi_refcnt > 0);
1865 1865
1866 1866 seg->s_share->rsmsi_refcnt--;
1867 1867
1868 1868 if (seg->s_share->rsmsi_refcnt == 0) {
1869 1869 rsmsharelock_release(seg);
1870 1870 mutex_destroy(&seg->s_share->rsmsi_lock);
1871 1871 cv_destroy(&seg->s_share->rsmsi_cv);
1872 1872 kmem_free((void *)(seg->s_share),
1873 1873 sizeof (rsm_import_share_t));
1874 1874 } else {
1875 1875 rsmsharelock_release(seg);
1876 1876 }
1877 1877 /*
1878 1878 * The following needs to be done after any
1879 1879 * rsmsharelock calls which use seg->s_share.
1880 1880 */
1881 1881 seg->s_share = NULL;
1882 1882 }
1883 1883
1884 1884 cv_destroy(&seg->s_cv);
1885 1885 mutex_destroy(&seg->s_lock);
1886 1886 rsmacl_free(seg->s_acl, seg->s_acl_len);
1887 1887 rsmpiacl_free(seg->s_acl_in, seg->s_acl_len);
1888 1888 if (seg->s_adapter)
1889 1889 rsmka_release_adapter(seg->s_adapter);
1890 1890
1891 1891 kmem_free((void *)seg, sizeof (*seg));
1892 1892
1893 1893 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free done\n"));
1894 1894
1895 1895 }
1896 1896
1897 1897
1898 1898 static rsmseg_t *
1899 1899 rsmseg_alloc(minor_t num, struct cred *cred)
1900 1900 {
1901 1901 rsmseg_t *new;
1902 1902 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1903 1903
1904 1904 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc enter\n"));
1905 1905 /*
1906 1906 * allocate memory for new segment. This should be a segkmem cache.
1907 1907 */
1908 1908 new = (rsmseg_t *)kmem_zalloc(sizeof (*new), KM_SLEEP);
1909 1909
1910 1910 new->s_state = RSM_STATE_NEW;
1911 1911 new->s_minor = num;
1912 1912 new->s_acl_len = 0;
1913 1913 new->s_cookie = NULL;
1914 1914 new->s_adapter = NULL;
1915 1915
1916 1916 new->s_mode = 0777 & ~PTOU((ttoproc(curthread)))->u_cmask;
1917 1917 /* we don't have a key yet, will set at export/connect */
1918 1918 new->s_uid = crgetuid(cred);
1919 1919 new->s_gid = crgetgid(cred);
1920 1920
1921 1921 mutex_init(&new->s_lock, NULL, MUTEX_DRIVER, (void *)NULL);
1922 1922 cv_init(&new->s_cv, NULL, CV_DRIVER, 0);
1923 1923
1924 1924 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc done\n"));
1925 1925
1926 1926 return (new);
1927 1927 }
1928 1928
1929 1929 /* ******************************** Driver Open/Close/Poll *************** */
1930 1930
1931 1931 /*ARGSUSED1*/
1932 1932 static int
1933 1933 rsm_open(dev_t *devp, int flag, int otyp, struct cred *cred)
1934 1934 {
1935 1935 minor_t rnum;
1936 1936 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
1937 1937
1938 1938 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open enter\n"));
1939 1939 /*
1940 1940 * Char only
1941 1941 */
1942 1942 if (otyp != OTYP_CHR) {
1943 1943 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad otyp\n"));
1944 1944 return (EINVAL);
1945 1945 }
1946 1946
1947 1947 /*
1948 1948 * Only zero can be opened, clones are used for resources.
1949 1949 */
1950 1950 if (getminor(*devp) != RSM_DRIVER_MINOR) {
1951 1951 DBG_PRINTF((category, RSM_ERR,
1952 1952 "rsm_open: bad minor %d\n", getminor(*devp)));
1953 1953 return (ENODEV);
1954 1954 }
1955 1955
1956 1956 if ((flag & FEXCL) != 0 && secpolicy_excl_open(cred) != 0) {
1957 1957 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad perm\n"));
1958 1958 return (EPERM);
1959 1959 }
1960 1960
1961 1961 if (!(flag & FWRITE)) {
1962 1962 /*
1963 1963 * The library function _rsm_librsm_init calls open for
1964 1964 * /dev/rsm with flag set to O_RDONLY. We want a valid
1965 1965 * file descriptor to be returned for minor device zero.
1966 1966 */
1967 1967
1968 1968 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1969 1969 "rsm_open RDONLY done\n"));
1970 1970 return (DDI_SUCCESS);
1971 1971 }
1972 1972
1973 1973 /*
1974 1974 * - allocate new minor number and segment.
1975 1975 * - add segment to list of all segments.
1976 1976 * - set minordev data to segment
1977 1977 * - update devp argument to new device
1978 1978 * - update s_cred to cred; make sure you do crhold(cred);
1979 1979 */
1980 1980
1981 1981 /* allocate a new resource number */
1982 1982 if (rsmresource_alloc(&rnum) == RSM_SUCCESS) {
1983 1983 /*
1984 1984 * We will bind this minor to a specific resource in first
1985 1985 * ioctl
1986 1986 */
1987 1987 *devp = makedevice(getmajor(*devp), rnum);
1988 1988 } else {
1989 1989 return (EAGAIN);
1990 1990 }
1991 1991
1992 1992 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open done\n"));
1993 1993 return (DDI_SUCCESS);
1994 1994 }
1995 1995
1996 1996 static void
1997 1997 rsmseg_close(rsmseg_t *seg, int force_flag)
1998 1998 {
1999 1999 int e = RSM_SUCCESS;
2000 2000
2001 2001 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2002 2002
2003 2003 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close enter\n"));
2004 2004
2005 2005 rsmseglock_acquire(seg);
2006 2006 if (!force_flag && (seg->s_hdr.rsmrc_type ==
2007 2007 RSM_RESOURCE_EXPORT_SEGMENT)) {
2008 2008 /*
2009 2009 * If we are processing rsm_close wait for force_destroy
2010 2010 * processing to complete since force_destroy processing
2011 2011 * needs to finish first before we can free the segment.
2012 2012 * force_destroy is only for export segments
2013 2013 */
2014 2014 while (seg->s_flags & RSM_FORCE_DESTROY_WAIT) {
2015 2015 cv_wait(&seg->s_cv, &seg->s_lock);
2016 2016 }
2017 2017 }
2018 2018 rsmseglock_release(seg);
2019 2019
2020 2020 /* It's ok to read the state without a lock */
2021 2021 switch (seg->s_state) {
2022 2022 case RSM_STATE_EXPORT:
2023 2023 case RSM_STATE_EXPORT_QUIESCING:
2024 2024 case RSM_STATE_EXPORT_QUIESCED:
2025 2025 e = rsm_unpublish(seg, 1);
2026 2026 /* FALLTHRU */
2027 2027 case RSM_STATE_BIND_QUIESCED:
2028 2028 /* FALLTHRU */
2029 2029 case RSM_STATE_BIND:
2030 2030 e = rsm_unbind(seg);
2031 2031 if (e != RSM_SUCCESS && force_flag == 1)
2032 2032 return;
2033 2033 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT);
2034 2034 /* FALLTHRU */
2035 2035 case RSM_STATE_NEW_QUIESCED:
2036 2036 rsmseglock_acquire(seg);
2037 2037 seg->s_state = RSM_STATE_NEW;
2038 2038 cv_broadcast(&seg->s_cv);
2039 2039 rsmseglock_release(seg);
2040 2040 break;
2041 2041 case RSM_STATE_NEW:
2042 2042 break;
2043 2043 case RSM_STATE_ZOMBIE:
2044 2044 /*
2045 2045 * Segments in this state have been removed off the
2046 2046 * exported segments list and have been unpublished
2047 2047 * and unbind. These segments have been removed during
2048 2048 * a callback to the rsm_export_force_destroy, which
2049 2049 * is called for the purpose of unlocking these
2050 2050 * exported memory segments when a process exits but
2051 2051 * leaves the segments locked down since rsm_close is
2052 2052 * is not called for the segments. This can happen
2053 2053 * when a process calls fork or exec and then exits.
2054 2054 * Once the segments are in the ZOMBIE state, all that
2055 2055 * remains is to destroy them when rsm_close is called.
2056 2056 * This is done here. Thus, for such segments the
2057 2057 * the state is changed to new so that later in this
2058 2058 * function rsmseg_free is called.
2059 2059 */
2060 2060 rsmseglock_acquire(seg);
2061 2061 seg->s_state = RSM_STATE_NEW;
2062 2062 rsmseglock_release(seg);
2063 2063 break;
2064 2064 case RSM_STATE_MAP_QUIESCE:
2065 2065 case RSM_STATE_ACTIVE:
2066 2066 /* Disconnect will handle the unmap */
2067 2067 case RSM_STATE_CONN_QUIESCE:
2068 2068 case RSM_STATE_CONNECT:
2069 2069 case RSM_STATE_DISCONNECT:
2070 2070 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
2071 2071 (void) rsm_disconnect(seg);
2072 2072 break;
2073 2073 case RSM_STATE_MAPPING:
2074 2074 /*FALLTHRU*/
2075 2075 case RSM_STATE_END:
2076 2076 DBG_PRINTF((category, RSM_ERR,
2077 2077 "Invalid segment state %d in rsm_close\n", seg->s_state));
2078 2078 break;
2079 2079 default:
2080 2080 DBG_PRINTF((category, RSM_ERR,
2081 2081 "Invalid segment state %d in rsm_close\n", seg->s_state));
2082 2082 break;
2083 2083 }
2084 2084
2085 2085 /*
2086 2086 * check state.
2087 2087 * - make sure you do crfree(s_cred);
2088 2088 * release segment and minor number
2089 2089 */
2090 2090 ASSERT(seg->s_state == RSM_STATE_NEW);
2091 2091
2092 2092 /*
2093 2093 * The export_force_destroy callback is created to unlock
2094 2094 * the exported segments of a process
2095 2095 * when the process does a fork or exec and then exits calls this
2096 2096 * function with the force flag set to 1 which indicates that the
2097 2097 * segment state must be converted to ZOMBIE. This state means that the
2098 2098 * segments still exist and have been unlocked and most importantly the
2099 2099 * only operation allowed is to destroy them on an rsm_close.
2100 2100 */
2101 2101 if (force_flag) {
2102 2102 rsmseglock_acquire(seg);
2103 2103 seg->s_state = RSM_STATE_ZOMBIE;
2104 2104 rsmseglock_release(seg);
2105 2105 } else {
2106 2106 rsmseg_free(seg);
2107 2107 }
2108 2108
2109 2109 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close done\n"));
2110 2110 }
2111 2111
2112 2112 static int
2113 2113 rsm_close(dev_t dev, int flag, int otyp, cred_t *cred)
2114 2114 {
2115 2115 minor_t rnum = getminor(dev);
2116 2116 rsmresource_t *res;
2117 2117 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2118 2118
2119 2119 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close enter\n"));
2120 2120
2121 2121 flag = flag; cred = cred;
2122 2122
2123 2123 if (otyp != OTYP_CHR)
2124 2124 return (EINVAL);
2125 2125
2126 2126 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rnum = %d\n", rnum));
2127 2127
2128 2128 /*
2129 2129 * At this point we are the last reference to the resource.
2130 2130 * Free resource number from resource table.
2131 2131 * It's ok to remove number before we free the segment.
2132 2132 * We need to lock the resource to protect against remote calls.
2133 2133 */
2134 2134 if (rnum == RSM_DRIVER_MINOR ||
2135 2135 (res = rsmresource_free(rnum)) == NULL) {
2136 2136 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2137 2137 return (DDI_SUCCESS);
2138 2138 }
2139 2139
2140 2140 switch (res->rsmrc_type) {
2141 2141 case RSM_RESOURCE_EXPORT_SEGMENT:
2142 2142 case RSM_RESOURCE_IMPORT_SEGMENT:
2143 2143 rsmseg_close((rsmseg_t *)res, 0);
2144 2144 break;
2145 2145 case RSM_RESOURCE_BAR:
2146 2146 DBG_PRINTF((category, RSM_ERR, "bad resource in rsm_close\n"));
2147 2147 break;
2148 2148 default:
2149 2149 break;
2150 2150 }
2151 2151
2152 2152 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2153 2153
2154 2154 return (DDI_SUCCESS);
2155 2155 }
2156 2156
2157 2157 /*
2158 2158 * rsm_inc_pgcnt
2159 2159 *
2160 2160 * Description: increment rsm page counter.
2161 2161 *
2162 2162 * Parameters: pgcnt_t pnum; number of pages to be used
2163 2163 *
2164 2164 * Returns: RSM_SUCCESS if memory limit not exceeded
2165 2165 * ENOSPC if memory limit exceeded. In this case, the
2166 2166 * page counter remains unchanged.
2167 2167 *
2168 2168 */
2169 2169 static int
2170 2170 rsm_inc_pgcnt(pgcnt_t pnum)
2171 2171 {
2172 2172 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2173 2173 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2174 2174 return (RSM_SUCCESS);
2175 2175 }
2176 2176
2177 2177 mutex_enter(&rsm_pgcnt_lock);
2178 2178
2179 2179 if (rsm_pgcnt + pnum > rsm_pgcnt_max) {
2180 2180 /* ensure that limits have not been exceeded */
2181 2181 mutex_exit(&rsm_pgcnt_lock);
2182 2182 return (RSMERR_INSUFFICIENT_MEM);
2183 2183 }
2184 2184
2185 2185 rsm_pgcnt += pnum;
2186 2186 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt incr to %d.\n",
2187 2187 rsm_pgcnt));
2188 2188 mutex_exit(&rsm_pgcnt_lock);
2189 2189
2190 2190 return (RSM_SUCCESS);
2191 2191 }
2192 2192
2193 2193 /*
2194 2194 * rsm_dec_pgcnt
2195 2195 *
2196 2196 * Description: decrement rsm page counter.
2197 2197 *
2198 2198 * Parameters: pgcnt_t pnum; number of pages freed
2199 2199 *
2200 2200 */
2201 2201 static void
2202 2202 rsm_dec_pgcnt(pgcnt_t pnum)
2203 2203 {
2204 2204 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2205 2205
2206 2206 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2207 2207 return;
2208 2208 }
2209 2209
2210 2210 mutex_enter(&rsm_pgcnt_lock);
2211 2211 ASSERT(rsm_pgcnt >= pnum);
2212 2212 rsm_pgcnt -= pnum;
2213 2213 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt decr to %d.\n",
2214 2214 rsm_pgcnt));
2215 2215 mutex_exit(&rsm_pgcnt_lock);
2216 2216 }
2217 2217
2218 2218 static struct umem_callback_ops rsm_as_ops = {
2219 2219 UMEM_CALLBACK_VERSION, /* version number */
2220 2220 rsm_export_force_destroy,
2221 2221 };
2222 2222
2223 2223 static int
2224 2224 rsm_bind_pages(ddi_umem_cookie_t *cookie, caddr_t vaddr, size_t len,
2225 2225 proc_t *procp)
2226 2226 {
2227 2227 int error = RSM_SUCCESS;
2228 2228 ulong_t pnum;
2229 2229 struct umem_callback_ops *callbackops = &rsm_as_ops;
2230 2230
2231 2231 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2232 2232
2233 2233 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages enter\n"));
2234 2234
2235 2235 /*
2236 2236 * Make sure vaddr and len are aligned on a page boundary
2237 2237 */
2238 2238 if ((uintptr_t)vaddr & (PAGESIZE - 1)) {
2239 2239 return (RSMERR_BAD_ADDR);
2240 2240 }
2241 2241
2242 2242 if (len & (PAGESIZE - 1)) {
2243 2243 return (RSMERR_BAD_LENGTH);
2244 2244 }
2245 2245
2246 2246 /*
2247 2247 * Find number of pages
2248 2248 */
2249 2249 pnum = btopr(len);
2250 2250 error = rsm_inc_pgcnt(pnum);
2251 2251 if (error != RSM_SUCCESS) {
2252 2252 DBG_PRINTF((category, RSM_ERR,
2253 2253 "rsm_bind_pages:mem limit exceeded\n"));
2254 2254 return (RSMERR_INSUFFICIENT_MEM);
2255 2255 }
2256 2256
2257 2257 error = umem_lockmemory(vaddr, len,
2258 2258 DDI_UMEMLOCK_WRITE|DDI_UMEMLOCK_READ|DDI_UMEMLOCK_LONGTERM,
2259 2259 cookie,
2260 2260 callbackops, procp);
2261 2261
2262 2262 if (error) {
2263 2263 rsm_dec_pgcnt(pnum);
2264 2264 DBG_PRINTF((category, RSM_ERR,
2265 2265 "rsm_bind_pages:ddi_umem_lock failed\n"));
2266 2266 /*
2267 2267 * ddi_umem_lock, in the case of failure, returns one of
2268 2268 * the following three errors. These are translated into
2269 2269 * the RSMERR namespace and returned.
2270 2270 */
2271 2271 if (error == EFAULT)
2272 2272 return (RSMERR_BAD_ADDR);
2273 2273 else if (error == EACCES)
2274 2274 return (RSMERR_PERM_DENIED);
2275 2275 else
2276 2276 return (RSMERR_INSUFFICIENT_MEM);
2277 2277 }
2278 2278
2279 2279 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages done\n"));
2280 2280
2281 2281 return (error);
2282 2282
2283 2283 }
2284 2284
2285 2285 static int
2286 2286 rsm_unbind_pages(rsmseg_t *seg)
2287 2287 {
2288 2288 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2289 2289
2290 2290 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages enter\n"));
2291 2291
2292 2292 ASSERT(rsmseglock_held(seg));
2293 2293
2294 2294 if (seg->s_cookie != NULL) {
2295 2295 /* unlock address range */
2296 2296 ddi_umem_unlock(seg->s_cookie);
2297 2297 rsm_dec_pgcnt(btopr(seg->s_len));
2298 2298 seg->s_cookie = NULL;
2299 2299 }
2300 2300
2301 2301 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages done\n"));
2302 2302
2303 2303 return (RSM_SUCCESS);
2304 2304 }
2305 2305
2306 2306
2307 2307 static int
2308 2308 rsm_bind(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2309 2309 {
2310 2310 int e;
2311 2311 adapter_t *adapter;
2312 2312 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2313 2313
2314 2314 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind enter\n"));
2315 2315
2316 2316 adapter = rsm_getadapter(msg, mode);
2317 2317 if (adapter == NULL) {
2318 2318 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2319 2319 "rsm_bind done:no adapter\n"));
2320 2320 return (RSMERR_CTLR_NOT_PRESENT);
2321 2321 }
2322 2322
2323 2323 /* lock address range */
2324 2324 if (msg->vaddr == NULL) {
2325 2325 rsmka_release_adapter(adapter);
2326 2326 DBG_PRINTF((category, RSM_ERR,
2327 2327 "rsm: rsm_bind done: invalid vaddr\n"));
2328 2328 return (RSMERR_BAD_ADDR);
2329 2329 }
2330 2330 if (msg->len <= 0) {
2331 2331 rsmka_release_adapter(adapter);
2332 2332 DBG_PRINTF((category, RSM_ERR,
2333 2333 "rsm_bind: invalid length\n"));
2334 2334 return (RSMERR_BAD_LENGTH);
2335 2335 }
2336 2336
2337 2337 /* Lock segment */
2338 2338 rsmseglock_acquire(seg);
2339 2339
2340 2340 while (seg->s_state == RSM_STATE_NEW_QUIESCED) {
2341 2341 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2342 2342 DBG_PRINTF((category, RSM_DEBUG,
2343 2343 "rsm_bind done: cv_wait INTERRUPTED"));
2344 2344 rsmka_release_adapter(adapter);
2345 2345 rsmseglock_release(seg);
2346 2346 return (RSMERR_INTERRUPTED);
2347 2347 }
2348 2348 }
2349 2349
2350 2350 ASSERT(seg->s_state == RSM_STATE_NEW);
2351 2351
2352 2352 ASSERT(seg->s_cookie == NULL);
2353 2353
2354 2354 e = rsm_bind_pages(&seg->s_cookie, msg->vaddr, msg->len, curproc);
2355 2355 if (e == RSM_SUCCESS) {
2356 2356 seg->s_flags |= RSM_USER_MEMORY;
2357 2357 if (msg->perm & RSM_ALLOW_REBIND) {
2358 2358 seg->s_flags |= RSMKA_ALLOW_UNBIND_REBIND;
2359 2359 }
2360 2360 if (msg->perm & RSM_CREATE_SEG_DONTWAIT) {
2361 2361 seg->s_flags |= RSMKA_SET_RESOURCE_DONTWAIT;
2362 2362 }
2363 2363 seg->s_region.r_vaddr = msg->vaddr;
2364 2364 /*
2365 2365 * Set the s_pid value in the segment structure. This is used
2366 2366 * to identify exported segments belonging to a particular
2367 2367 * process so that when the process exits, these segments can
2368 2368 * be unlocked forcefully even if rsm_close is not called on
2369 2369 * process exit since there maybe other processes referencing
2370 2370 * them (for example on a fork or exec).
2371 2371 * The s_pid value is also used to authenticate the process
2372 2372 * doing a publish or unpublish on the export segment. Only
2373 2373 * the creator of the export segment has a right to do a
2374 2374 * publish or unpublish and unbind on the segment.
2375 2375 */
2376 2376 seg->s_pid = ddi_get_pid();
2377 2377 seg->s_len = msg->len;
2378 2378 seg->s_state = RSM_STATE_BIND;
2379 2379 seg->s_adapter = adapter;
2380 2380 seg->s_proc = curproc;
2381 2381 } else {
2382 2382 rsmka_release_adapter(adapter);
2383 2383 DBG_PRINTF((category, RSM_WARNING,
2384 2384 "unable to lock down pages\n"));
2385 2385 }
2386 2386
2387 2387 msg->rnum = seg->s_minor;
2388 2388 /* Unlock segment */
2389 2389 rsmseglock_release(seg);
2390 2390
2391 2391 if (e == RSM_SUCCESS) {
2392 2392 /* copyout the resource number */
2393 2393 #ifdef _MULTI_DATAMODEL
2394 2394 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2395 2395 rsm_ioctlmsg32_t msg32;
2396 2396
2397 2397 msg32.rnum = msg->rnum;
2398 2398 if (ddi_copyout((caddr_t)&msg32.rnum,
2399 2399 (caddr_t)&((rsm_ioctlmsg32_t *)dataptr)->rnum,
2400 2400 sizeof (minor_t), mode)) {
2401 2401 rsmka_release_adapter(adapter);
2402 2402 e = RSMERR_BAD_ADDR;
2403 2403 }
2404 2404 }
2405 2405 #endif
2406 2406 if (ddi_copyout((caddr_t)&msg->rnum,
2407 2407 (caddr_t)&((rsm_ioctlmsg_t *)dataptr)->rnum,
2408 2408 sizeof (minor_t), mode)) {
2409 2409 rsmka_release_adapter(adapter);
2410 2410 e = RSMERR_BAD_ADDR;
2411 2411 }
2412 2412 }
2413 2413
2414 2414 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind done\n"));
2415 2415
2416 2416 return (e);
2417 2417 }
2418 2418
2419 2419 static void
2420 2420 rsm_remap_local_importers(rsm_node_id_t src_nodeid,
2421 2421 rsm_memseg_id_t ex_segid,
2422 2422 ddi_umem_cookie_t cookie)
2423 2423
2424 2424 {
2425 2425 rsmresource_t *p = NULL;
2426 2426 rsmhash_table_t *rhash = &rsm_import_segs;
2427 2427 uint_t index;
2428 2428
2429 2429 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2430 2430 "rsm_remap_local_importers enter\n"));
2431 2431
2432 2432 index = rsmhash(ex_segid);
2433 2433
2434 2434 rw_enter(&rhash->rsmhash_rw, RW_READER);
2435 2435
2436 2436 p = rsmhash_getbkt(rhash, index);
2437 2437
2438 2438 for (; p; p = p->rsmrc_next) {
2439 2439 rsmseg_t *seg = (rsmseg_t *)p;
2440 2440 rsmseglock_acquire(seg);
2441 2441 /*
2442 2442 * Change the s_cookie value of only the local importers
2443 2443 * which have been mapped (in state RSM_STATE_ACTIVE).
2444 2444 * Note that there is no need to change the s_cookie value
2445 2445 * if the imported segment is in RSM_STATE_MAPPING since
2446 2446 * eventually the s_cookie will be updated via the mapping
2447 2447 * functionality.
2448 2448 */
2449 2449 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid) &&
2450 2450 (seg->s_state == RSM_STATE_ACTIVE)) {
2451 2451 seg->s_cookie = cookie;
2452 2452 }
2453 2453 rsmseglock_release(seg);
2454 2454 }
2455 2455 rw_exit(&rhash->rsmhash_rw);
2456 2456
2457 2457 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2458 2458 "rsm_remap_local_importers done\n"));
2459 2459 }
2460 2460
2461 2461 static int
2462 2462 rsm_rebind(rsmseg_t *seg, rsm_ioctlmsg_t *msg)
2463 2463 {
2464 2464 int e;
2465 2465 adapter_t *adapter;
2466 2466 ddi_umem_cookie_t cookie;
2467 2467 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2468 2468
2469 2469 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind enter\n"));
2470 2470
2471 2471 /* Check for permissions to rebind */
2472 2472 if (!(seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND)) {
2473 2473 return (RSMERR_REBIND_NOT_ALLOWED);
2474 2474 }
2475 2475
2476 2476 if (seg->s_pid != ddi_get_pid() &&
2477 2477 ddi_get_pid() != 0) {
2478 2478 DBG_PRINTF((category, RSM_ERR, "rsm_rebind: Not owner\n"));
2479 2479 return (RSMERR_NOT_CREATOR);
2480 2480 }
2481 2481
2482 2482 /*
2483 2483 * We will not be allowing partial rebind and hence length passed
2484 2484 * in must be same as segment length
2485 2485 */
2486 2486 if (msg->vaddr == NULL) {
2487 2487 DBG_PRINTF((category, RSM_ERR,
2488 2488 "rsm_rebind done: null msg->vaddr\n"));
2489 2489 return (RSMERR_BAD_ADDR);
2490 2490 }
2491 2491 if (msg->len != seg->s_len) {
2492 2492 DBG_PRINTF((category, RSM_ERR,
2493 2493 "rsm_rebind: invalid length\n"));
2494 2494 return (RSMERR_BAD_LENGTH);
2495 2495 }
2496 2496
2497 2497 /* Lock segment */
2498 2498 rsmseglock_acquire(seg);
2499 2499
2500 2500 while ((seg->s_state == RSM_STATE_BIND_QUIESCED) ||
2501 2501 (seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
2502 2502 (seg->s_state == RSM_STATE_EXPORT_QUIESCED)) {
2503 2503 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2504 2504 rsmseglock_release(seg);
2505 2505 DBG_PRINTF((category, RSM_DEBUG,
2506 2506 "rsm_rebind done: cv_wait INTERRUPTED"));
2507 2507 return (RSMERR_INTERRUPTED);
2508 2508 }
2509 2509 }
2510 2510
2511 2511 /* verify segment state */
2512 2512 if ((seg->s_state != RSM_STATE_BIND) &&
2513 2513 (seg->s_state != RSM_STATE_EXPORT)) {
2514 2514 /* Unlock segment */
2515 2515 rsmseglock_release(seg);
2516 2516 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2517 2517 "rsm_rebind done: invalid state\n"));
2518 2518 return (RSMERR_BAD_SEG_HNDL);
2519 2519 }
2520 2520
2521 2521 ASSERT(seg->s_cookie != NULL);
2522 2522
2523 2523 if (msg->vaddr == seg->s_region.r_vaddr) {
2524 2524 rsmseglock_release(seg);
2525 2525 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2526 2526 return (RSM_SUCCESS);
2527 2527 }
2528 2528
2529 2529 e = rsm_bind_pages(&cookie, msg->vaddr, msg->len, curproc);
2530 2530 if (e == RSM_SUCCESS) {
2531 2531 struct buf *xbuf;
2532 2532 dev_t sdev = 0;
2533 2533 rsm_memory_local_t mem;
2534 2534
2535 2535 xbuf = ddi_umem_iosetup(cookie, 0, msg->len, B_WRITE,
2536 2536 sdev, 0, NULL, DDI_UMEM_SLEEP);
2537 2537 ASSERT(xbuf != NULL);
2538 2538
2539 2539 mem.ms_type = RSM_MEM_BUF;
2540 2540 mem.ms_bp = xbuf;
2541 2541
2542 2542 adapter = seg->s_adapter;
2543 2543 e = adapter->rsmpi_ops->rsm_rebind(
2544 2544 seg->s_handle.out, 0, &mem,
2545 2545 RSM_RESOURCE_DONTWAIT, NULL);
2546 2546
2547 2547 if (e == RSM_SUCCESS) {
2548 2548 /*
2549 2549 * unbind the older pages, and unload local importers;
2550 2550 * but don't disconnect importers
2551 2551 */
2552 2552 (void) rsm_unbind_pages(seg);
2553 2553 seg->s_cookie = cookie;
2554 2554 seg->s_region.r_vaddr = msg->vaddr;
2555 2555 rsm_remap_local_importers(my_nodeid, seg->s_segid,
2556 2556 cookie);
2557 2557 } else {
2558 2558 /*
2559 2559 * Unbind the pages associated with "cookie" by the
2560 2560 * rsm_bind_pages calls prior to this. This is
2561 2561 * similar to what is done in the rsm_unbind_pages
2562 2562 * routine for the seg->s_cookie.
2563 2563 */
2564 2564 ddi_umem_unlock(cookie);
2565 2565 rsm_dec_pgcnt(btopr(msg->len));
2566 2566 DBG_PRINTF((category, RSM_ERR,
2567 2567 "rsm_rebind failed with %d\n", e));
2568 2568 }
2569 2569 /*
2570 2570 * At present there is no dependency on the existence of xbuf.
2571 2571 * So we can free it here. If in the future this changes, it can
2572 2572 * be freed sometime during the segment destroy.
2573 2573 */
2574 2574 freerbuf(xbuf);
2575 2575 }
2576 2576
2577 2577 /* Unlock segment */
2578 2578 rsmseglock_release(seg);
2579 2579
2580 2580 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2581 2581
2582 2582 return (e);
2583 2583 }
2584 2584
2585 2585 static int
2586 2586 rsm_unbind(rsmseg_t *seg)
2587 2587 {
2588 2588 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2589 2589
2590 2590 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind enter\n"));
2591 2591
2592 2592 rsmseglock_acquire(seg);
2593 2593
2594 2594 /* verify segment state */
2595 2595 if ((seg->s_state != RSM_STATE_BIND) &&
2596 2596 (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2597 2597 rsmseglock_release(seg);
2598 2598 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2599 2599 "rsm_unbind: invalid state\n"));
2600 2600 return (RSMERR_BAD_SEG_HNDL);
2601 2601 }
2602 2602
2603 2603 /* unlock current range */
2604 2604 (void) rsm_unbind_pages(seg);
2605 2605
2606 2606 if (seg->s_state == RSM_STATE_BIND) {
2607 2607 seg->s_state = RSM_STATE_NEW;
2608 2608 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
2609 2609 seg->s_state = RSM_STATE_NEW_QUIESCED;
2610 2610 }
2611 2611
2612 2612 rsmseglock_release(seg);
2613 2613
2614 2614 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind done\n"));
2615 2615
2616 2616 return (RSM_SUCCESS);
2617 2617 }
2618 2618
2619 2619 /* **************************** Exporter Access List Management ******* */
2620 2620 static void
2621 2621 rsmacl_free(rsmapi_access_entry_t *acl, int acl_len)
2622 2622 {
2623 2623 int acl_sz;
2624 2624 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2625 2625
2626 2626 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free enter\n"));
2627 2627
2628 2628 /* acl could be NULL */
2629 2629
2630 2630 if (acl != NULL && acl_len > 0) {
2631 2631 acl_sz = acl_len * sizeof (rsmapi_access_entry_t);
2632 2632 kmem_free((void *)acl, acl_sz);
2633 2633 }
2634 2634
2635 2635 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free done\n"));
2636 2636 }
2637 2637
2638 2638 static void
2639 2639 rsmpiacl_free(rsm_access_entry_t *acl, int acl_len)
2640 2640 {
2641 2641 int acl_sz;
2642 2642 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2643 2643
2644 2644 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free enter\n"));
2645 2645
2646 2646 if (acl != NULL && acl_len > 0) {
2647 2647 acl_sz = acl_len * sizeof (rsm_access_entry_t);
2648 2648 kmem_free((void *)acl, acl_sz);
2649 2649 }
2650 2650
2651 2651 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free done\n"));
2652 2652
2653 2653 }
2654 2654
2655 2655 static int
2656 2656 rsmacl_build(rsm_ioctlmsg_t *msg, int mode,
2657 2657 rsmapi_access_entry_t **list, int *len, int loopback)
2658 2658 {
2659 2659 rsmapi_access_entry_t *acl;
2660 2660 int acl_len;
2661 2661 int i;
2662 2662 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2663 2663
2664 2664 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build enter\n"));
2665 2665
2666 2666 *len = 0;
2667 2667 *list = NULL;
2668 2668
2669 2669 acl_len = msg->acl_len;
2670 2670 if ((loopback && acl_len > 1) || (acl_len < 0) ||
2671 2671 (acl_len > MAX_NODES)) {
2672 2672 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2673 2673 "rsmacl_build done: acl invalid\n"));
2674 2674 return (RSMERR_BAD_ACL);
2675 2675 }
2676 2676
2677 2677 if (acl_len > 0 && acl_len <= MAX_NODES) {
2678 2678 size_t acl_size = acl_len * sizeof (rsmapi_access_entry_t);
2679 2679
2680 2680 acl = kmem_alloc(acl_size, KM_SLEEP);
2681 2681
2682 2682 if (ddi_copyin((caddr_t)msg->acl, (caddr_t)acl,
2683 2683 acl_size, mode)) {
2684 2684 kmem_free((void *) acl, acl_size);
2685 2685 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2686 2686 "rsmacl_build done: BAD_ADDR\n"));
2687 2687 return (RSMERR_BAD_ADDR);
2688 2688 }
2689 2689
2690 2690 /*
2691 2691 * Verify access list
2692 2692 */
2693 2693 for (i = 0; i < acl_len; i++) {
2694 2694 if (acl[i].ae_node > MAX_NODES ||
2695 2695 (loopback && (acl[i].ae_node != my_nodeid)) ||
2696 2696 acl[i].ae_permission > RSM_ACCESS_TRUSTED) {
2697 2697 /* invalid entry */
2698 2698 kmem_free((void *) acl, acl_size);
2699 2699 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2700 2700 "rsmacl_build done: EINVAL\n"));
2701 2701 return (RSMERR_BAD_ACL);
2702 2702 }
2703 2703 }
2704 2704
2705 2705 *len = acl_len;
2706 2706 *list = acl;
2707 2707 }
2708 2708
2709 2709 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build done\n"));
2710 2710
2711 2711 return (DDI_SUCCESS);
2712 2712 }
2713 2713
2714 2714 static int
2715 2715 rsmpiacl_create(rsmapi_access_entry_t *src, rsm_access_entry_t **dest,
2716 2716 int acl_len, adapter_t *adapter)
2717 2717 {
2718 2718 rsm_access_entry_t *acl;
2719 2719 rsm_addr_t hwaddr;
2720 2720 int i;
2721 2721 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2722 2722
2723 2723 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create enter\n"));
2724 2724
2725 2725 if (src != NULL) {
2726 2726 size_t acl_size = acl_len * sizeof (rsm_access_entry_t);
2727 2727 acl = kmem_alloc(acl_size, KM_SLEEP);
2728 2728
2729 2729 /*
2730 2730 * translate access list
2731 2731 */
2732 2732 for (i = 0; i < acl_len; i++) {
2733 2733 if (src[i].ae_node == my_nodeid) {
2734 2734 acl[i].ae_addr = adapter->hwaddr;
2735 2735 } else {
2736 2736 hwaddr = get_remote_hwaddr(adapter,
2737 2737 src[i].ae_node);
2738 2738 if ((int64_t)hwaddr < 0) {
2739 2739 /* invalid hwaddr */
2740 2740 kmem_free((void *) acl, acl_size);
2741 2741 DBG_PRINTF((category,
2742 2742 RSM_DEBUG_VERBOSE,
2743 2743 "rsmpiacl_create done:"
2744 2744 "EINVAL hwaddr\n"));
2745 2745 return (RSMERR_INTERNAL_ERROR);
2746 2746 }
2747 2747 acl[i].ae_addr = hwaddr;
2748 2748 }
2749 2749 /* rsmpi understands only RSM_PERM_XXXX */
2750 2750 acl[i].ae_permission =
2751 2751 src[i].ae_permission & RSM_PERM_RDWR;
2752 2752 }
2753 2753 *dest = acl;
2754 2754 } else {
2755 2755 *dest = NULL;
2756 2756 }
2757 2757
2758 2758 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create done\n"));
2759 2759
2760 2760 return (RSM_SUCCESS);
2761 2761 }
2762 2762
2763 2763 static int
2764 2764 rsmsegacl_validate(rsmipc_request_t *req, rsm_node_id_t rnode,
2765 2765 rsmipc_reply_t *reply)
2766 2766 {
2767 2767
2768 2768 int i;
2769 2769 rsmseg_t *seg;
2770 2770 rsm_memseg_id_t key = req->rsmipc_key;
2771 2771 rsm_permission_t perm = req->rsmipc_perm;
2772 2772 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2773 2773
2774 2774 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2775 2775 "rsmsegacl_validate enter\n"));
2776 2776
2777 2777 /*
2778 2778 * Find segment and grab its lock. The reason why we grab the segment
2779 2779 * lock in side the search is to avoid the race when the segment is
2780 2780 * being deleted and we already have a pointer to it.
2781 2781 */
2782 2782 seg = rsmexport_lookup(key);
2783 2783 if (!seg) {
2784 2784 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2785 2785 "rsmsegacl_validate done: %u ENXIO\n", key));
2786 2786 return (RSMERR_SEG_NOT_PUBLISHED);
2787 2787 }
2788 2788
2789 2789 ASSERT(rsmseglock_held(seg));
2790 2790 ASSERT(seg->s_state == RSM_STATE_EXPORT);
2791 2791
2792 2792 /*
2793 2793 * We implement a 2-level protection scheme.
2794 2794 * First, we check if local/remote host has access rights.
2795 2795 * Second, we check if the user has access rights.
2796 2796 *
2797 2797 * This routine only validates the rnode access_list
2798 2798 */
2799 2799 if (seg->s_acl_len > 0) {
2800 2800 /*
2801 2801 * Check host access list
2802 2802 */
2803 2803 ASSERT(seg->s_acl != NULL);
2804 2804 for (i = 0; i < seg->s_acl_len; i++) {
2805 2805 if (seg->s_acl[i].ae_node == rnode) {
2806 2806 perm &= seg->s_acl[i].ae_permission;
2807 2807 goto found;
2808 2808 }
2809 2809 }
2810 2810 /* rnode is not found in the list */
2811 2811 rsmseglock_release(seg);
2812 2812 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2813 2813 "rsmsegacl_validate done: EPERM\n"));
2814 2814 return (RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
2815 2815 } else {
2816 2816 /* use default owner creation umask */
2817 2817 perm &= seg->s_mode;
2818 2818 }
2819 2819
2820 2820 found:
2821 2821 /* update perm for this node */
2822 2822 reply->rsmipc_mode = perm;
2823 2823 reply->rsmipc_uid = seg->s_uid;
2824 2824 reply->rsmipc_gid = seg->s_gid;
2825 2825 reply->rsmipc_segid = seg->s_segid;
2826 2826 reply->rsmipc_seglen = seg->s_len;
2827 2827
2828 2828 /*
2829 2829 * Perm of requesting node is valid; source will validate user
2830 2830 */
2831 2831 rsmseglock_release(seg);
2832 2832
2833 2833 /*
2834 2834 * Add the importer to the list right away, if connect fails
2835 2835 * the importer will ask the exporter to remove it.
2836 2836 */
2837 2837 importer_list_add(rnode, key, req->rsmipc_adapter_hwaddr,
2838 2838 req->rsmipc_segment_cookie);
2839 2839
2840 2840 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegacl_validate done\n"));
2841 2841
2842 2842 return (RSM_SUCCESS);
2843 2843 }
2844 2844
2845 2845
2846 2846 /* ************************** Exporter Calls ************************* */
2847 2847
2848 2848 static int
2849 2849 rsm_publish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2850 2850 {
2851 2851 int e;
2852 2852 int acl_len;
2853 2853 rsmapi_access_entry_t *acl;
2854 2854 rsm_access_entry_t *rsmpi_acl;
2855 2855 rsm_memory_local_t mem;
2856 2856 struct buf *xbuf;
2857 2857 dev_t sdev = 0;
2858 2858 adapter_t *adapter;
2859 2859 rsm_memseg_id_t segment_id = 0;
2860 2860 int loopback_flag = 0;
2861 2861 int create_flags = 0;
2862 2862 rsm_resource_callback_t callback_flag;
2863 2863 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2864 2864
2865 2865 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish enter\n"));
2866 2866
2867 2867 if (seg->s_adapter == &loopback_adapter)
2868 2868 loopback_flag = 1;
2869 2869
2870 2870 if (seg->s_pid != ddi_get_pid() &&
2871 2871 ddi_get_pid() != 0) {
2872 2872 DBG_PRINTF((category, RSM_ERR,
2873 2873 "rsm_publish: Not creator\n"));
2874 2874 return (RSMERR_NOT_CREATOR);
2875 2875 }
2876 2876
2877 2877 /*
2878 2878 * Get per node access list
2879 2879 */
2880 2880 e = rsmacl_build(msg, mode, &acl, &acl_len, loopback_flag);
2881 2881 if (e != DDI_SUCCESS) {
2882 2882 DBG_PRINTF((category, RSM_ERR,
2883 2883 "rsm_publish done: rsmacl_build failed\n"));
2884 2884 return (e);
2885 2885 }
2886 2886
2887 2887 /*
2888 2888 * The application provided msg->key is used for resolving a
2889 2889 * segment id according to the following:
2890 2890 * key = 0 Kernel Agent selects the segment id
2891 2891 * key <= RSM_DLPI_ID_END Reserved for system usage except
2892 2892 * RSMLIB range
2893 2893 * key < RSM_USER_APP_ID_BASE segment id = key
2894 2894 * key >= RSM_USER_APP_ID_BASE Reserved for KA selections
2895 2895 *
2896 2896 * rsm_nextavail_segmentid is initialized to 0x80000000 and
2897 2897 * overflows to zero after 0x80000000 allocations.
2898 2898 * An algorithm is needed which allows reinitialization and provides
2899 2899 * for reallocation after overflow. For now, ENOMEM is returned
2900 2900 * once the overflow condition has occurred.
2901 2901 */
2902 2902 if (msg->key == 0) {
2903 2903 mutex_enter(&rsm_lock);
2904 2904 segment_id = rsm_nextavail_segmentid;
2905 2905 if (segment_id != 0) {
2906 2906 rsm_nextavail_segmentid++;
2907 2907 mutex_exit(&rsm_lock);
2908 2908 } else {
2909 2909 mutex_exit(&rsm_lock);
2910 2910 DBG_PRINTF((category, RSM_ERR,
2911 2911 "rsm_publish done: no more keys avlbl\n"));
2912 2912 return (RSMERR_INSUFFICIENT_RESOURCES);
2913 2913 }
2914 2914 } else if BETWEEN(msg->key, RSM_RSMLIB_ID_BASE, RSM_RSMLIB_ID_END)
2915 2915 /* range reserved for internal use by base/ndi libraries */
2916 2916 segment_id = msg->key;
2917 2917 else if (msg->key <= RSM_DLPI_ID_END)
2918 2918 return (RSMERR_RESERVED_SEGID);
2919 2919 else if (msg->key <= (uint_t)RSM_USER_APP_ID_BASE -1)
2920 2920 segment_id = msg->key;
2921 2921 else {
2922 2922 DBG_PRINTF((category, RSM_ERR,
2923 2923 "rsm_publish done: invalid key %u\n", msg->key));
2924 2924 return (RSMERR_RESERVED_SEGID);
2925 2925 }
2926 2926
2927 2927 /* Add key to exportlist; The segment lock is held on success */
2928 2928 e = rsmexport_add(seg, segment_id);
2929 2929 if (e) {
2930 2930 rsmacl_free(acl, acl_len);
2931 2931 DBG_PRINTF((category, RSM_ERR,
2932 2932 "rsm_publish done: export_add failed: %d\n", e));
2933 2933 return (e);
2934 2934 }
2935 2935
2936 2936 seg->s_segid = segment_id;
2937 2937
2938 2938 if ((seg->s_state != RSM_STATE_BIND) &&
2939 2939 (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2940 2940 /* state changed since then, free acl and return */
2941 2941 rsmseglock_release(seg);
2942 2942 rsmexport_rm(seg);
2943 2943 rsmacl_free(acl, acl_len);
2944 2944 DBG_PRINTF((category, RSM_ERR,
2945 2945 "rsm_publish done: segment in wrong state: %d\n",
2946 2946 seg->s_state));
2947 2947 return (RSMERR_BAD_SEG_HNDL);
2948 2948 }
2949 2949
2950 2950 /*
2951 2951 * If this is for a local memory handle and permissions are zero,
2952 2952 * then the surrogate segment is very large and we want to skip
2953 2953 * allocation of DVMA space.
2954 2954 *
2955 2955 * Careful! If the user didn't use an ACL list, acl will be a NULL
2956 2956 * pointer. Check that before dereferencing it.
2957 2957 */
2958 2958 if (acl != (rsmapi_access_entry_t *)NULL) {
2959 2959 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
2960 2960 goto skipdriver;
2961 2961 }
2962 2962
2963 2963 /* create segment */
2964 2964 xbuf = ddi_umem_iosetup(seg->s_cookie, 0, seg->s_len, B_WRITE,
2965 2965 sdev, 0, NULL, DDI_UMEM_SLEEP);
2966 2966 ASSERT(xbuf != NULL);
2967 2967
2968 2968 mem.ms_type = RSM_MEM_BUF;
2969 2969 mem.ms_bp = xbuf;
2970 2970
2971 2971 /* This call includes a bind operations */
2972 2972
2973 2973 adapter = seg->s_adapter;
2974 2974 /*
2975 2975 * create a acl list with hwaddr for RSMPI publish
2976 2976 */
2977 2977 e = rsmpiacl_create(acl, &rsmpi_acl, acl_len, adapter);
2978 2978
2979 2979 if (e != RSM_SUCCESS) {
2980 2980 rsmseglock_release(seg);
2981 2981 rsmexport_rm(seg);
2982 2982 rsmacl_free(acl, acl_len);
2983 2983 freerbuf(xbuf);
2984 2984 DBG_PRINTF((category, RSM_ERR,
2985 2985 "rsm_publish done: rsmpiacl_create failed: %d\n", e));
2986 2986 return (e);
2987 2987 }
2988 2988
2989 2989 if (seg->s_state == RSM_STATE_BIND) {
2990 2990 /* create segment */
2991 2991
2992 2992 /* This call includes a bind operations */
2993 2993
2994 2994 if (seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
2995 2995 create_flags = RSM_ALLOW_UNBIND_REBIND;
2996 2996 }
2997 2997
2998 2998 if (seg->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
2999 2999 callback_flag = RSM_RESOURCE_DONTWAIT;
3000 3000 } else {
3001 3001 callback_flag = RSM_RESOURCE_SLEEP;
3002 3002 }
3003 3003
3004 3004 e = adapter->rsmpi_ops->rsm_seg_create(
3005 3005 adapter->rsmpi_handle,
3006 3006 &seg->s_handle.out, seg->s_len,
3007 3007 create_flags, &mem,
3008 3008 callback_flag, NULL);
3009 3009 /*
3010 3010 * At present there is no dependency on the existence of xbuf.
3011 3011 * So we can free it here. If in the future this changes, it can
3012 3012 * be freed sometime during the segment destroy.
3013 3013 */
3014 3014 freerbuf(xbuf);
3015 3015
3016 3016 if (e != RSM_SUCCESS) {
3017 3017 rsmseglock_release(seg);
3018 3018 rsmexport_rm(seg);
3019 3019 rsmacl_free(acl, acl_len);
3020 3020 rsmpiacl_free(rsmpi_acl, acl_len);
3021 3021 DBG_PRINTF((category, RSM_ERR,
3022 3022 "rsm_publish done: export_create failed: %d\n", e));
3023 3023 /*
3024 3024 * The following assertion ensures that the two errors
3025 3025 * related to the length and its alignment do not occur
3026 3026 * since they have been checked during export_create
3027 3027 */
3028 3028 ASSERT(e != RSMERR_BAD_MEM_ALIGNMENT &&
3029 3029 e != RSMERR_BAD_LENGTH);
3030 3030 if (e == RSMERR_NOT_MEM)
3031 3031 e = RSMERR_INSUFFICIENT_MEM;
3032 3032
3033 3033 return (e);
3034 3034 }
3035 3035 /* export segment, this should create an IMMU mapping */
3036 3036 e = adapter->rsmpi_ops->rsm_publish(
3037 3037 seg->s_handle.out,
3038 3038 rsmpi_acl, acl_len,
3039 3039 seg->s_segid,
3040 3040 RSM_RESOURCE_DONTWAIT, NULL);
3041 3041
3042 3042 if (e != RSM_SUCCESS) {
3043 3043 adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3044 3044 rsmseglock_release(seg);
3045 3045 rsmexport_rm(seg);
3046 3046 rsmacl_free(acl, acl_len);
3047 3047 rsmpiacl_free(rsmpi_acl, acl_len);
3048 3048 DBG_PRINTF((category, RSM_ERR,
3049 3049 "rsm_publish done: export_publish failed: %d\n",
3050 3050 e));
3051 3051 return (e);
3052 3052 }
3053 3053 }
3054 3054
3055 3055 seg->s_acl_in = rsmpi_acl;
3056 3056
3057 3057 skipdriver:
3058 3058 /* defer s_acl/s_acl_len -> avoid crash in rsmseg_free */
3059 3059 seg->s_acl_len = acl_len;
3060 3060 seg->s_acl = acl;
3061 3061
3062 3062 if (seg->s_state == RSM_STATE_BIND) {
3063 3063 seg->s_state = RSM_STATE_EXPORT;
3064 3064 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
3065 3065 seg->s_state = RSM_STATE_EXPORT_QUIESCED;
3066 3066 cv_broadcast(&seg->s_cv);
3067 3067 }
3068 3068
3069 3069 rsmseglock_release(seg);
3070 3070
3071 3071 /*
3072 3072 * If the segment id was solicited, then return it in
3073 3073 * the original incoming message.
3074 3074 */
3075 3075 if (msg->key == 0) {
3076 3076 msg->key = segment_id;
3077 3077 #ifdef _MULTI_DATAMODEL
3078 3078 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
3079 3079 rsm_ioctlmsg32_t msg32;
3080 3080
3081 3081 msg32.key = msg->key;
3082 3082 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3083 3083 "rsm_publish done\n"));
3084 3084 return (ddi_copyout((caddr_t)&msg32,
3085 3085 (caddr_t)dataptr, sizeof (msg32), mode));
3086 3086 }
3087 3087 #endif
3088 3088 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3089 3089 "rsm_publish done\n"));
3090 3090 return (ddi_copyout((caddr_t)msg,
3091 3091 (caddr_t)dataptr, sizeof (*msg), mode));
3092 3092 }
3093 3093
3094 3094 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish done\n"));
3095 3095 return (DDI_SUCCESS);
3096 3096 }
3097 3097
3098 3098 /*
3099 3099 * This function modifies the access control list of an already published
3100 3100 * segment. There is no effect on import segments which are already
3101 3101 * connected.
3102 3102 */
3103 3103 static int
3104 3104 rsm_republish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int mode)
3105 3105 {
3106 3106 rsmapi_access_entry_t *new_acl, *old_acl, *tmp_acl;
3107 3107 rsm_access_entry_t *rsmpi_new_acl, *rsmpi_old_acl;
3108 3108 int new_acl_len, old_acl_len, tmp_acl_len;
3109 3109 int e, i;
3110 3110 adapter_t *adapter;
3111 3111 int loopback_flag = 0;
3112 3112 rsm_memseg_id_t key;
3113 3113 rsm_permission_t permission;
3114 3114 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3115 3115
3116 3116 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish enter\n"));
3117 3117
3118 3118 if ((seg->s_state != RSM_STATE_EXPORT) &&
3119 3119 (seg->s_state != RSM_STATE_EXPORT_QUIESCED) &&
3120 3120 (seg->s_state != RSM_STATE_EXPORT_QUIESCING))
3121 3121 return (RSMERR_SEG_NOT_PUBLISHED);
3122 3122
3123 3123 if (seg->s_pid != ddi_get_pid() &&
3124 3124 ddi_get_pid() != 0) {
3125 3125 DBG_PRINTF((category, RSM_ERR,
3126 3126 "rsm_republish: Not owner\n"));
3127 3127 return (RSMERR_NOT_CREATOR);
3128 3128 }
3129 3129
3130 3130 if (seg->s_adapter == &loopback_adapter)
3131 3131 loopback_flag = 1;
3132 3132
3133 3133 /*
3134 3134 * Build new list first
3135 3135 */
3136 3136 e = rsmacl_build(msg, mode, &new_acl, &new_acl_len, loopback_flag);
3137 3137 if (e) {
3138 3138 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3139 3139 "rsm_republish done: rsmacl_build failed %d", e));
3140 3140 return (e);
3141 3141 }
3142 3142
3143 3143 /* Lock segment */
3144 3144 rsmseglock_acquire(seg);
3145 3145 /*
3146 3146 * a republish is in progress - REPUBLISH message is being
3147 3147 * sent to the importers so wait for it to complete OR
3148 3148 * wait till DR completes
3149 3149 */
3150 3150 while (((seg->s_state == RSM_STATE_EXPORT) &&
3151 3151 (seg->s_flags & RSM_REPUBLISH_WAIT)) ||
3152 3152 (seg->s_state == RSM_STATE_EXPORT_QUIESCED) ||
3153 3153 (seg->s_state == RSM_STATE_EXPORT_QUIESCING)) {
3154 3154 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3155 3155 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3156 3156 "rsm_republish done: cv_wait INTERRUPTED"));
3157 3157 rsmseglock_release(seg);
3158 3158 rsmacl_free(new_acl, new_acl_len);
3159 3159 return (RSMERR_INTERRUPTED);
3160 3160 }
3161 3161 }
3162 3162
3163 3163 /* recheck if state is valid */
3164 3164 if (seg->s_state != RSM_STATE_EXPORT) {
3165 3165 rsmseglock_release(seg);
3166 3166 rsmacl_free(new_acl, new_acl_len);
3167 3167 return (RSMERR_SEG_NOT_PUBLISHED);
3168 3168 }
3169 3169
3170 3170 key = seg->s_key;
3171 3171 old_acl = seg->s_acl;
3172 3172 old_acl_len = seg->s_acl_len;
3173 3173
3174 3174 seg->s_acl = new_acl;
3175 3175 seg->s_acl_len = new_acl_len;
3176 3176
3177 3177 /*
3178 3178 * This call will only be meaningful if and when the interconnect
3179 3179 * layer makes use of the access list
3180 3180 */
3181 3181 adapter = seg->s_adapter;
3182 3182 /*
3183 3183 * create a acl list with hwaddr for RSMPI publish
3184 3184 */
3185 3185 e = rsmpiacl_create(new_acl, &rsmpi_new_acl, new_acl_len, adapter);
3186 3186
3187 3187 if (e != RSM_SUCCESS) {
3188 3188 seg->s_acl = old_acl;
3189 3189 seg->s_acl_len = old_acl_len;
3190 3190 rsmseglock_release(seg);
3191 3191 rsmacl_free(new_acl, new_acl_len);
3192 3192 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3193 3193 "rsm_republish done: rsmpiacl_create failed %d", e));
3194 3194 return (e);
3195 3195 }
3196 3196 rsmpi_old_acl = seg->s_acl_in;
3197 3197 seg->s_acl_in = rsmpi_new_acl;
3198 3198
3199 3199 e = adapter->rsmpi_ops->rsm_republish(seg->s_handle.out,
3200 3200 seg->s_acl_in, seg->s_acl_len,
3201 3201 RSM_RESOURCE_DONTWAIT, NULL);
3202 3202
3203 3203 if (e != RSM_SUCCESS) {
3204 3204 seg->s_acl = old_acl;
3205 3205 seg->s_acl_in = rsmpi_old_acl;
3206 3206 seg->s_acl_len = old_acl_len;
3207 3207 rsmseglock_release(seg);
3208 3208 rsmacl_free(new_acl, new_acl_len);
3209 3209 rsmpiacl_free(rsmpi_new_acl, new_acl_len);
3210 3210
3211 3211 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3212 3212 "rsm_republish done: rsmpi republish failed %d\n", e));
3213 3213 return (e);
3214 3214 }
3215 3215
3216 3216 /* create a tmp copy of the new acl */
3217 3217 tmp_acl_len = new_acl_len;
3218 3218 if (tmp_acl_len > 0) {
3219 3219 tmp_acl = kmem_zalloc(new_acl_len*sizeof (*tmp_acl), KM_SLEEP);
3220 3220 for (i = 0; i < tmp_acl_len; i++) {
3221 3221 tmp_acl[i].ae_node = new_acl[i].ae_node;
3222 3222 tmp_acl[i].ae_permission = new_acl[i].ae_permission;
3223 3223 }
3224 3224 /*
3225 3225 * The default permission of a node which was in the old
3226 3226 * ACL but not in the new ACL is 0 ie no access.
3227 3227 */
3228 3228 permission = 0;
3229 3229 } else {
3230 3230 /*
3231 3231 * NULL acl means all importers can connect and
3232 3232 * default permission will be owner creation umask
3233 3233 */
3234 3234 tmp_acl = NULL;
3235 3235 permission = seg->s_mode;
3236 3236 }
3237 3237
3238 3238 /* make other republishers to wait for republish to complete */
3239 3239 seg->s_flags |= RSM_REPUBLISH_WAIT;
3240 3240
3241 3241 rsmseglock_release(seg);
3242 3242
3243 3243 /* send the new perms to the importing nodes */
3244 3244 rsm_send_republish(key, tmp_acl, tmp_acl_len, permission);
3245 3245
3246 3246 rsmseglock_acquire(seg);
3247 3247 seg->s_flags &= ~RSM_REPUBLISH_WAIT;
3248 3248 /* wake up any one waiting for republish to complete */
3249 3249 cv_broadcast(&seg->s_cv);
3250 3250 rsmseglock_release(seg);
3251 3251
3252 3252 rsmacl_free(tmp_acl, tmp_acl_len);
3253 3253 rsmacl_free(old_acl, old_acl_len);
3254 3254 rsmpiacl_free(rsmpi_old_acl, old_acl_len);
3255 3255
3256 3256 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish done\n"));
3257 3257 return (DDI_SUCCESS);
3258 3258 }
3259 3259
3260 3260 static int
3261 3261 rsm_unpublish(rsmseg_t *seg, int mode)
3262 3262 {
3263 3263 rsmapi_access_entry_t *acl;
3264 3264 rsm_access_entry_t *rsmpi_acl;
3265 3265 int acl_len;
3266 3266 int e;
3267 3267 adapter_t *adapter;
3268 3268 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3269 3269
3270 3270 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish enter\n"));
3271 3271
3272 3272 if (seg->s_pid != ddi_get_pid() &&
3273 3273 ddi_get_pid() != 0) {
3274 3274 DBG_PRINTF((category, RSM_ERR,
3275 3275 "rsm_unpublish: Not creator\n"));
3276 3276 return (RSMERR_NOT_CREATOR);
3277 3277 }
3278 3278
3279 3279 rsmseglock_acquire(seg);
3280 3280 /*
3281 3281 * wait for QUIESCING to complete here before rsmexport_rm
3282 3282 * is called because the SUSPEND_COMPLETE mesg which changes
3283 3283 * the seg state from EXPORT_QUIESCING to EXPORT_QUIESCED and
3284 3284 * signals the cv_wait needs to find it in the hashtable.
3285 3285 */
3286 3286 while ((seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
3287 3287 ((seg->s_state == RSM_STATE_EXPORT) && (seg->s_rdmacnt > 0))) {
3288 3288 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3289 3289 rsmseglock_release(seg);
3290 3290 DBG_PRINTF((category, RSM_ERR,
3291 3291 "rsm_unpublish done: cv_wait INTR qscing"
3292 3292 "getv/putv in progress"));
3293 3293 return (RSMERR_INTERRUPTED);
3294 3294 }
3295 3295 }
3296 3296
3297 3297 /* verify segment state */
3298 3298 if ((seg->s_state != RSM_STATE_EXPORT) &&
3299 3299 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3300 3300 rsmseglock_release(seg);
3301 3301 DBG_PRINTF((category, RSM_ERR,
3302 3302 "rsm_unpublish done: bad state %x\n", seg->s_state));
3303 3303 return (RSMERR_SEG_NOT_PUBLISHED);
3304 3304 }
3305 3305
3306 3306 rsmseglock_release(seg);
3307 3307
3308 3308 rsmexport_rm(seg);
3309 3309
3310 3310 rsm_send_importer_disconnects(seg->s_segid, my_nodeid);
3311 3311
3312 3312 rsmseglock_acquire(seg);
3313 3313 /*
3314 3314 * wait for republish to complete
3315 3315 */
3316 3316 while ((seg->s_state == RSM_STATE_EXPORT) &&
3317 3317 (seg->s_flags & RSM_REPUBLISH_WAIT)) {
3318 3318 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3319 3319 DBG_PRINTF((category, RSM_ERR,
3320 3320 "rsm_unpublish done: cv_wait INTR repubing"));
3321 3321 rsmseglock_release(seg);
3322 3322 return (RSMERR_INTERRUPTED);
3323 3323 }
3324 3324 }
3325 3325
3326 3326 if ((seg->s_state != RSM_STATE_EXPORT) &&
3327 3327 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3328 3328 DBG_PRINTF((category, RSM_ERR,
3329 3329 "rsm_unpublish done: invalid state"));
3330 3330 rsmseglock_release(seg);
3331 3331 return (RSMERR_SEG_NOT_PUBLISHED);
3332 3332 }
3333 3333
3334 3334 /*
3335 3335 * check for putv/get surrogate segment which was not published
3336 3336 * to the driver.
3337 3337 *
3338 3338 * Be certain to see if there is an ACL first! If this segment was
3339 3339 * not published with an ACL, acl will be a null pointer. Check
3340 3340 * that before dereferencing it.
3341 3341 */
3342 3342 acl = seg->s_acl;
3343 3343 if (acl != (rsmapi_access_entry_t *)NULL) {
3344 3344 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
3345 3345 goto bypass;
3346 3346 }
3347 3347
3348 3348 /* The RSMPI unpublish/destroy has been done if seg is QUIESCED */
3349 3349 if (seg->s_state == RSM_STATE_EXPORT_QUIESCED)
3350 3350 goto bypass;
3351 3351
3352 3352 adapter = seg->s_adapter;
3353 3353 for (;;) {
3354 3354 if (seg->s_state != RSM_STATE_EXPORT) {
3355 3355 rsmseglock_release(seg);
3356 3356 DBG_PRINTF((category, RSM_ERR,
3357 3357 "rsm_unpublish done: bad state %x\n",
3358 3358 seg->s_state));
3359 3359 return (RSMERR_SEG_NOT_PUBLISHED);
3360 3360 }
3361 3361
3362 3362 /* unpublish from adapter */
3363 3363 e = adapter->rsmpi_ops->rsm_unpublish(seg->s_handle.out);
3364 3364
3365 3365 if (e == RSM_SUCCESS) {
3366 3366 break;
3367 3367 }
3368 3368
3369 3369 if (e == RSMERR_SEG_IN_USE && mode == 1) {
3370 3370 /*
3371 3371 * wait for unpublish to succeed, it's busy.
3372 3372 */
3373 3373 seg->s_flags |= RSM_EXPORT_WAIT;
3374 3374
3375 3375 /* wait for a max of 1 ms - this is an empirical */
3376 3376 /* value that was found by some minimal testing */
3377 3377 /* can be fine tuned when we have better numbers */
3378 3378 /* A long term fix would be to send cv_signal */
3379 3379 /* from the intr callback routine */
3380 3380 /* currently nobody signals this wait */
3381 3381 (void) cv_reltimedwait(&seg->s_cv, &seg->s_lock,
3382 3382 drv_usectohz(1000), TR_CLOCK_TICK);
3383 3383
3384 3384 DBG_PRINTF((category, RSM_ERR,
3385 3385 "rsm_unpublish: SEG_IN_USE\n"));
3386 3386
3387 3387 seg->s_flags &= ~RSM_EXPORT_WAIT;
3388 3388 } else {
3389 3389 if (mode == 1) {
3390 3390 DBG_PRINTF((category, RSM_ERR,
3391 3391 "rsm:rsmpi unpublish err %x\n", e));
3392 3392 seg->s_state = RSM_STATE_BIND;
3393 3393 }
3394 3394 rsmseglock_release(seg);
3395 3395 return (e);
3396 3396 }
3397 3397 }
3398 3398
3399 3399 /* Free segment */
3400 3400 e = adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3401 3401
3402 3402 if (e != RSM_SUCCESS) {
3403 3403 DBG_PRINTF((category, RSM_ERR,
3404 3404 "rsm_unpublish: rsmpi destroy key=%x failed %x\n",
3405 3405 seg->s_key, e));
3406 3406 }
3407 3407
3408 3408 bypass:
3409 3409 acl = seg->s_acl;
3410 3410 rsmpi_acl = seg->s_acl_in;
3411 3411 acl_len = seg->s_acl_len;
3412 3412
3413 3413 seg->s_acl = NULL;
3414 3414 seg->s_acl_in = NULL;
3415 3415 seg->s_acl_len = 0;
3416 3416
3417 3417 if (seg->s_state == RSM_STATE_EXPORT) {
3418 3418 seg->s_state = RSM_STATE_BIND;
3419 3419 } else if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) {
3420 3420 seg->s_state = RSM_STATE_BIND_QUIESCED;
3421 3421 cv_broadcast(&seg->s_cv);
3422 3422 }
3423 3423
3424 3424 rsmseglock_release(seg);
3425 3425
3426 3426 rsmacl_free(acl, acl_len);
3427 3427 rsmpiacl_free(rsmpi_acl, acl_len);
3428 3428
3429 3429 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish done\n"));
3430 3430
3431 3431 return (DDI_SUCCESS);
3432 3432 }
3433 3433
3434 3434 /*
3435 3435 * Called from rsm_unpublish to force an unload and disconnection of all
3436 3436 * importers of the unpublished segment.
3437 3437 *
3438 3438 * First build the list of segments requiring a force disconnect, then
3439 3439 * send a request for each.
3440 3440 */
3441 3441 static void
3442 3442 rsm_send_importer_disconnects(rsm_memseg_id_t ex_segid,
3443 3443 rsm_node_id_t ex_nodeid)
3444 3444 {
3445 3445 rsmipc_request_t request;
3446 3446 importing_token_t *prev_token, *token, *tmp_token, *tokp;
3447 3447 importing_token_t *force_disconnect_list = NULL;
3448 3448 int index;
3449 3449
3450 3450 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3451 3451 "rsm_send_importer_disconnects enter\n"));
3452 3452
3453 3453 index = rsmhash(ex_segid);
3454 3454
3455 3455 mutex_enter(&importer_list.lock);
3456 3456
3457 3457 prev_token = NULL;
3458 3458 token = importer_list.bucket[index];
3459 3459
3460 3460 while (token != NULL) {
3461 3461 if (token->key == ex_segid) {
3462 3462 /*
3463 3463 * take it off the importer list and add it
3464 3464 * to the force disconnect list.
3465 3465 */
3466 3466 if (prev_token == NULL)
3467 3467 importer_list.bucket[index] = token->next;
3468 3468 else
3469 3469 prev_token->next = token->next;
3470 3470 tmp_token = token;
3471 3471 token = token->next;
3472 3472 if (force_disconnect_list == NULL) {
3473 3473 force_disconnect_list = tmp_token;
3474 3474 tmp_token->next = NULL;
3475 3475 } else {
3476 3476 tokp = force_disconnect_list;
3477 3477 /*
3478 3478 * make sure that the tmp_token's node
3479 3479 * is not already on the force disconnect
3480 3480 * list.
3481 3481 */
3482 3482 while (tokp != NULL) {
3483 3483 if (tokp->importing_node ==
3484 3484 tmp_token->importing_node) {
3485 3485 break;
3486 3486 }
3487 3487 tokp = tokp->next;
3488 3488 }
3489 3489 if (tokp == NULL) {
3490 3490 tmp_token->next =
3491 3491 force_disconnect_list;
3492 3492 force_disconnect_list = tmp_token;
3493 3493 } else {
3494 3494 kmem_free((void *)tmp_token,
3495 3495 sizeof (*token));
3496 3496 }
3497 3497 }
3498 3498
3499 3499 } else {
3500 3500 prev_token = token;
3501 3501 token = token->next;
3502 3502 }
3503 3503 }
3504 3504 mutex_exit(&importer_list.lock);
3505 3505
3506 3506 token = force_disconnect_list;
3507 3507 while (token != NULL) {
3508 3508 if (token->importing_node == my_nodeid) {
3509 3509 rsm_force_unload(ex_nodeid, ex_segid,
3510 3510 DISCONNECT);
3511 3511 } else {
3512 3512 request.rsmipc_hdr.rsmipc_type =
3513 3513 RSMIPC_MSG_DISCONNECT;
3514 3514 request.rsmipc_key = token->key;
3515 3515 for (;;) {
3516 3516 if (rsmipc_send(token->importing_node,
3517 3517 &request,
3518 3518 RSM_NO_REPLY) == RSM_SUCCESS) {
3519 3519 break;
3520 3520 } else {
3521 3521 delay(drv_usectohz(10000));
3522 3522 }
3523 3523 }
3524 3524 }
3525 3525 tmp_token = token;
3526 3526 token = token->next;
3527 3527 kmem_free((void *)tmp_token, sizeof (*token));
3528 3528 }
3529 3529
3530 3530 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3531 3531 "rsm_send_importer_disconnects done\n"));
3532 3532 }
3533 3533
3534 3534 /*
3535 3535 * This function is used as a callback for unlocking the pages locked
3536 3536 * down by a process which then does a fork or an exec.
3537 3537 * It marks the export segments corresponding to umem cookie given by
3538 3538 * the *arg to be in a ZOMBIE state(by calling rsmseg_close to be
3539 3539 * destroyed later when an rsm_close occurs).
3540 3540 */
3541 3541 static void
3542 3542 rsm_export_force_destroy(ddi_umem_cookie_t *ck)
3543 3543 {
3544 3544 rsmresource_blk_t *blk;
3545 3545 rsmresource_t *p;
3546 3546 rsmseg_t *eseg = NULL;
3547 3547 int i, j;
3548 3548 int found = 0;
3549 3549
3550 3550 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3551 3551 "rsm_export_force_destroy enter\n"));
3552 3552
3553 3553 /*
3554 3554 * Walk the resource list and locate the export segment (either
3555 3555 * in the BIND or the EXPORT state) which corresponds to the
3556 3556 * ddi_umem_cookie_t being freed up, and call rsmseg_close.
3557 3557 * Change the state to ZOMBIE by calling rsmseg_close with the
3558 3558 * force_flag argument (the second argument) set to 1. Also,
3559 3559 * unpublish and unbind the segment, but don't free it. Free it
3560 3560 * only on a rsm_close call for the segment.
3561 3561 */
3562 3562 rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
3563 3563
3564 3564 for (i = 0; i < rsm_resource.rsmrc_len; i++) {
3565 3565 blk = rsm_resource.rsmrc_root[i];
3566 3566 if (blk == NULL) {
3567 3567 continue;
3568 3568 }
3569 3569
3570 3570 for (j = 0; j < RSMRC_BLKSZ; j++) {
3571 3571 p = blk->rsmrcblk_blks[j];
3572 3572 if ((p != NULL) && (p != RSMRC_RESERVED) &&
3573 3573 (p->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)) {
3574 3574 eseg = (rsmseg_t *)p;
3575 3575 if (eseg->s_cookie != ck)
3576 3576 continue; /* continue searching */
3577 3577 /*
3578 3578 * Found the segment, set flag to indicate
3579 3579 * force destroy processing is in progress
3580 3580 */
3581 3581 rsmseglock_acquire(eseg);
3582 3582 eseg->s_flags |= RSM_FORCE_DESTROY_WAIT;
3583 3583 rsmseglock_release(eseg);
3584 3584 found = 1;
3585 3585 break;
3586 3586 }
3587 3587 }
3588 3588
3589 3589 if (found)
3590 3590 break;
3591 3591 }
3592 3592
3593 3593 rw_exit(&rsm_resource.rsmrc_lock);
3594 3594
3595 3595 if (found) {
3596 3596 ASSERT(eseg != NULL);
3597 3597 /* call rsmseg_close with force flag set to 1 */
3598 3598 rsmseg_close(eseg, 1);
3599 3599 /*
3600 3600 * force destroy processing done, clear flag and signal any
3601 3601 * thread waiting in rsmseg_close.
3602 3602 */
3603 3603 rsmseglock_acquire(eseg);
3604 3604 eseg->s_flags &= ~RSM_FORCE_DESTROY_WAIT;
3605 3605 cv_broadcast(&eseg->s_cv);
3606 3606 rsmseglock_release(eseg);
3607 3607 }
3608 3608
3609 3609 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3610 3610 "rsm_export_force_destroy done\n"));
3611 3611 }
3612 3612
3613 3613 /* ******************************* Remote Calls *********************** */
3614 3614 static void
3615 3615 rsm_intr_segconnect(rsm_node_id_t src, rsmipc_request_t *req)
3616 3616 {
3617 3617 rsmipc_reply_t reply;
3618 3618 DBG_DEFINE(category,
3619 3619 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3620 3620
3621 3621 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3622 3622 "rsm_intr_segconnect enter\n"));
3623 3623
3624 3624 reply.rsmipc_status = (short)rsmsegacl_validate(req, src, &reply);
3625 3625
3626 3626 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
3627 3627 reply.rsmipc_hdr.rsmipc_cookie = req->rsmipc_hdr.rsmipc_cookie;
3628 3628
3629 3629 (void) rsmipc_send(src, NULL, &reply);
3630 3630
3631 3631 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3632 3632 "rsm_intr_segconnect done\n"));
3633 3633 }
3634 3634
3635 3635
3636 3636 /*
3637 3637 * When an exported segment is unpublished the exporter sends an ipc
3638 3638 * message (RSMIPC_MSG_DISCONNECT) to all importers. The recv ipc dispatcher
3639 3639 * calls this function. The import list is scanned; segments which match the
3640 3640 * exported segment id are unloaded and disconnected.
3641 3641 *
3642 3642 * Will also be called from rsm_rebind with disconnect_flag FALSE.
3643 3643 *
3644 3644 */
3645 3645 static void
3646 3646 rsm_force_unload(rsm_node_id_t src_nodeid,
3647 3647 rsm_memseg_id_t ex_segid,
3648 3648 boolean_t disconnect_flag)
3649 3649
3650 3650 {
3651 3651 rsmresource_t *p = NULL;
3652 3652 rsmhash_table_t *rhash = &rsm_import_segs;
3653 3653 uint_t index;
3654 3654 DBG_DEFINE(category,
3655 3655 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3656 3656
3657 3657 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload enter\n"));
3658 3658
3659 3659 index = rsmhash(ex_segid);
3660 3660
3661 3661 rw_enter(&rhash->rsmhash_rw, RW_READER);
3662 3662
3663 3663 p = rsmhash_getbkt(rhash, index);
3664 3664
3665 3665 for (; p; p = p->rsmrc_next) {
3666 3666 rsmseg_t *seg = (rsmseg_t *)p;
3667 3667 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid)) {
3668 3668 /*
3669 3669 * In order to make rsmseg_unload and rsm_force_unload
3670 3670 * thread safe, acquire the segment lock here.
3671 3671 * rsmseg_unload is responsible for releasing the lock.
3672 3672 * rsmseg_unload releases the lock just before a call
3673 3673 * to rsmipc_send or in case of an early exit which
3674 3674 * occurs if the segment was in the state
3675 3675 * RSM_STATE_CONNECTING or RSM_STATE_NEW.
3676 3676 */
3677 3677 rsmseglock_acquire(seg);
3678 3678 if (disconnect_flag)
3679 3679 seg->s_flags |= RSM_FORCE_DISCONNECT;
3680 3680 rsmseg_unload(seg);
3681 3681 }
3682 3682 }
3683 3683 rw_exit(&rhash->rsmhash_rw);
3684 3684
3685 3685 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload done\n"));
3686 3686 }
3687 3687
3688 3688 static void
3689 3689 rsm_intr_reply(rsmipc_msghdr_t *msg)
3690 3690 {
3691 3691 /*
3692 3692 * Find slot for cookie in reply.
3693 3693 * Match sequence with sequence in cookie
3694 3694 * If no match; return
3695 3695 * Try to grap lock of slot, if locked return
3696 3696 * copy data into reply slot area
3697 3697 * signal waiter
3698 3698 */
3699 3699 rsmipc_slot_t *slot;
3700 3700 rsmipc_cookie_t *cookie;
3701 3701 void *data = (void *) msg;
3702 3702 size_t size = sizeof (rsmipc_reply_t);
3703 3703 DBG_DEFINE(category,
3704 3704 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3705 3705
3706 3706 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply enter\n"));
3707 3707
3708 3708 cookie = &msg->rsmipc_cookie;
3709 3709 if (cookie->ic.index >= RSMIPC_SZ) {
3710 3710 DBG_PRINTF((category, RSM_ERR,
3711 3711 "rsm: rsm_intr_reply bad cookie %d\n", cookie->ic.index));
3712 3712 return;
3713 3713 }
3714 3714
3715 3715 ASSERT(cookie->ic.index < RSMIPC_SZ);
3716 3716 slot = &rsm_ipc.slots[cookie->ic.index];
3717 3717 mutex_enter(&slot->rsmipc_lock);
3718 3718 if (slot->rsmipc_cookie.value == cookie->value) {
3719 3719 /* found a match */
3720 3720 if (RSMIPC_GET(slot, RSMIPC_PENDING)) {
3721 3721 bcopy(data, slot->rsmipc_data, size);
3722 3722 RSMIPC_CLEAR(slot, RSMIPC_PENDING);
3723 3723 cv_signal(&slot->rsmipc_cv);
3724 3724 }
3725 3725 } else {
3726 3726 DBG_PRINTF((category, RSM_DEBUG,
3727 3727 "rsm: rsm_intr_reply mismatched reply %d\n",
3728 3728 cookie->ic.index));
3729 3729 }
3730 3730 mutex_exit(&slot->rsmipc_lock);
3731 3731 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply done\n"));
3732 3732 }
3733 3733
3734 3734 /*
3735 3735 * This function gets dispatched on the worker thread when we receive
3736 3736 * the SQREADY message. This function sends the SQREADY_ACK message.
3737 3737 */
3738 3738 static void
3739 3739 rsm_sqready_ack_deferred(void *arg)
3740 3740 {
3741 3741 path_t *path = (path_t *)arg;
3742 3742 DBG_DEFINE(category,
3743 3743 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3744 3744
3745 3745 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3746 3746 "rsm_sqready_ack_deferred enter\n"));
3747 3747
3748 3748 mutex_enter(&path->mutex);
3749 3749
3750 3750 /*
3751 3751 * If path is not active no point in sending the ACK
3752 3752 * because the whole SQREADY protocol will again start
3753 3753 * when the path becomes active.
3754 3754 */
3755 3755 if (path->state != RSMKA_PATH_ACTIVE) {
3756 3756 /*
3757 3757 * decrement the path refcnt incremented in rsm_proc_sqready
3758 3758 */
3759 3759 PATH_RELE_NOLOCK(path);
3760 3760 mutex_exit(&path->mutex);
3761 3761 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3762 3762 "rsm_sqready_ack_deferred done:!ACTIVE\n"));
3763 3763 return;
3764 3764 }
3765 3765
3766 3766 /* send an SQREADY_ACK message */
3767 3767 (void) rsmipc_send_controlmsg(path, RSMIPC_MSG_SQREADY_ACK);
3768 3768
3769 3769 /* initialize credits to the max level */
3770 3770 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3771 3771
3772 3772 /* wake up any send that is waiting for credits */
3773 3773 cv_broadcast(&path->sendq_token.sendq_cv);
3774 3774
3775 3775 /*
3776 3776 * decrement the path refcnt since we incremented it in
3777 3777 * rsm_proc_sqready
3778 3778 */
3779 3779 PATH_RELE_NOLOCK(path);
3780 3780
3781 3781 mutex_exit(&path->mutex);
3782 3782
3783 3783 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3784 3784 "rsm_sqready_ack_deferred done\n"));
3785 3785 }
3786 3786
3787 3787 /*
3788 3788 * Process the SQREADY message
3789 3789 */
3790 3790 static void
3791 3791 rsm_proc_sqready(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3792 3792 rsm_intr_hand_arg_t arg)
3793 3793 {
3794 3794 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg;
3795 3795 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
3796 3796 path_t *path;
3797 3797 DBG_DEFINE(category,
3798 3798 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3799 3799
3800 3800 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready enter\n"));
3801 3801
3802 3802 /* look up the path - incr the path refcnt */
3803 3803 path = rsm_find_path(hdlr_argp->adapter_name,
3804 3804 hdlr_argp->adapter_instance, src_hwaddr);
3805 3805
3806 3806 /*
3807 3807 * No path exists or path is not active - drop the message
3808 3808 */
3809 3809 if (path == NULL) {
3810 3810 DBG_PRINTF((category, RSM_DEBUG,
3811 3811 "rsm_proc_sqready done: msg dropped no path\n"));
3812 3812 return;
3813 3813 }
3814 3814
3815 3815 mutex_exit(&path->mutex);
3816 3816
3817 3817 /* drain any tasks from the previous incarnation */
3818 3818 taskq_wait(path->recv_taskq);
3819 3819
3820 3820 mutex_enter(&path->mutex);
3821 3821 /*
3822 3822 * If we'd sent an SQREADY message and were waiting for SQREADY_ACK
3823 3823 * in the meanwhile we received an SQREADY message, blindly reset
3824 3824 * the WAIT_FOR_SQACK flag because we'll just send SQREADY_ACK
3825 3825 * and forget about the SQREADY that we sent.
3826 3826 */
3827 3827 path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3828 3828
3829 3829 if (path->state != RSMKA_PATH_ACTIVE) {
3830 3830 /* decr refcnt and drop the mutex */
3831 3831 PATH_RELE_NOLOCK(path);
3832 3832 mutex_exit(&path->mutex);
3833 3833 DBG_PRINTF((category, RSM_DEBUG,
3834 3834 "rsm_proc_sqready done: msg dropped path !ACTIVE\n"));
3835 3835 return;
3836 3836 }
3837 3837
3838 3838 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready:path=%lx "
3839 3839 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3840 3840
3841 3841 /*
3842 3842 * The sender's local incarnation number is our remote incarnation
3843 3843 * number save it in the path data structure
3844 3844 */
3845 3845 path->remote_incn = msg->rsmipc_local_incn;
3846 3846 path->sendq_token.msgbuf_avail = 0;
3847 3847 path->procmsg_cnt = 0;
3848 3848
3849 3849 /*
3850 3850 * path is active - dispatch task to send SQREADY_ACK - remember
3851 3851 * RSMPI calls can't be done in interrupt context
3852 3852 *
3853 3853 * We can use the recv_taskq to send because the remote endpoint
3854 3854 * cannot start sending messages till it receives SQREADY_ACK hence
3855 3855 * at this point there are no tasks on recv_taskq.
3856 3856 *
3857 3857 * The path refcnt will be decremented in rsm_sqready_ack_deferred.
3858 3858 */
3859 3859 (void) taskq_dispatch(path->recv_taskq,
3860 3860 rsm_sqready_ack_deferred, path, KM_NOSLEEP);
3861 3861
3862 3862 mutex_exit(&path->mutex);
3863 3863
3864 3864
3865 3865 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready done\n"));
3866 3866 }
3867 3867
3868 3868 /*
3869 3869 * Process the SQREADY_ACK message
3870 3870 */
3871 3871 static void
3872 3872 rsm_proc_sqready_ack(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3873 3873 rsm_intr_hand_arg_t arg)
3874 3874 {
3875 3875 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg;
3876 3876 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
3877 3877 path_t *path;
3878 3878 DBG_DEFINE(category,
3879 3879 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3880 3880
3881 3881 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3882 3882 "rsm_proc_sqready_ack enter\n"));
3883 3883
3884 3884 /* look up the path - incr the path refcnt */
3885 3885 path = rsm_find_path(hdlr_argp->adapter_name,
3886 3886 hdlr_argp->adapter_instance, src_hwaddr);
3887 3887
3888 3888 /*
3889 3889 * drop the message if - no path exists or path is not active
3890 3890 * or if its not waiting for SQREADY_ACK message
3891 3891 */
3892 3892 if (path == NULL) {
3893 3893 DBG_PRINTF((category, RSM_DEBUG,
3894 3894 "rsm_proc_sqready_ack done: msg dropped no path\n"));
3895 3895 return;
3896 3896 }
3897 3897
3898 3898 if ((path->state != RSMKA_PATH_ACTIVE) ||
3899 3899 !(path->flags & RSMKA_WAIT_FOR_SQACK)) {
3900 3900 /* decrement the refcnt */
3901 3901 PATH_RELE_NOLOCK(path);
3902 3902 mutex_exit(&path->mutex);
3903 3903 DBG_PRINTF((category, RSM_DEBUG,
3904 3904 "rsm_proc_sqready_ack done: msg dropped\n"));
3905 3905 return;
3906 3906 }
3907 3907
3908 3908 /*
3909 3909 * Check if this message is in response to the last RSMIPC_MSG_SQREADY
3910 3910 * sent, if not drop it.
3911 3911 */
3912 3912 if (path->local_incn != msghdr->rsmipc_incn) {
3913 3913 /* decrement the refcnt */
3914 3914 PATH_RELE_NOLOCK(path);
3915 3915 mutex_exit(&path->mutex);
3916 3916 DBG_PRINTF((category, RSM_DEBUG,
3917 3917 "rsm_proc_sqready_ack done: msg old incn %lld\n",
3918 3918 msghdr->rsmipc_incn));
3919 3919 return;
3920 3920 }
3921 3921
3922 3922 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready_ack:path=%lx "
3923 3923 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3924 3924
3925 3925 /*
3926 3926 * clear the WAIT_FOR_SQACK flag since we have recvd the ack
3927 3927 */
3928 3928 path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3929 3929
3930 3930 /* save the remote sendq incn number */
3931 3931 path->remote_incn = msg->rsmipc_local_incn;
3932 3932
3933 3933 /* initialize credits to the max level */
3934 3934 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3935 3935
3936 3936 /* wake up any send that is waiting for credits */
3937 3937 cv_broadcast(&path->sendq_token.sendq_cv);
3938 3938
3939 3939 /* decrement the refcnt */
3940 3940 PATH_RELE_NOLOCK(path);
3941 3941
3942 3942 mutex_exit(&path->mutex);
3943 3943
3944 3944 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3945 3945 "rsm_proc_sqready_ack done\n"));
3946 3946 }
3947 3947
3948 3948 /*
3949 3949 * process the RSMIPC_MSG_CREDIT message
3950 3950 */
3951 3951 static void
3952 3952 rsm_add_credits(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3953 3953 rsm_intr_hand_arg_t arg)
3954 3954 {
3955 3955 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg;
3956 3956 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
3957 3957 path_t *path;
3958 3958 DBG_DEFINE(category,
3959 3959 RSM_KERNEL_AGENT | RSM_FUNC_ALL |
3960 3960 RSM_INTR_CALLBACK | RSM_FLOWCONTROL);
3961 3961
3962 3962 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits enter\n"));
3963 3963
3964 3964 /* look up the path - incr the path refcnt */
3965 3965 path = rsm_find_path(hdlr_argp->adapter_name,
3966 3966 hdlr_argp->adapter_instance, src_hwaddr);
3967 3967
3968 3968 if (path == NULL) {
3969 3969 DBG_PRINTF((category, RSM_DEBUG,
3970 3970 "rsm_add_credits enter: path not found\n"));
3971 3971 return;
3972 3972 }
3973 3973
3974 3974 /* the path is not active - discard credits */
3975 3975 if (path->state != RSMKA_PATH_ACTIVE) {
3976 3976 PATH_RELE_NOLOCK(path);
3977 3977 mutex_exit(&path->mutex);
3978 3978 DBG_PRINTF((category, RSM_DEBUG,
3979 3979 "rsm_add_credits enter:path=%lx !ACTIVE\n", path));
3980 3980 return;
3981 3981 }
3982 3982
3983 3983 /*
3984 3984 * Check if these credits are for current incarnation of the path.
3985 3985 */
3986 3986 if (path->local_incn != msghdr->rsmipc_incn) {
3987 3987 /* decrement the refcnt */
3988 3988 PATH_RELE_NOLOCK(path);
3989 3989 mutex_exit(&path->mutex);
3990 3990 DBG_PRINTF((category, RSM_DEBUG,
3991 3991 "rsm_add_credits enter: old incn %lld\n",
3992 3992 msghdr->rsmipc_incn));
3993 3993 return;
3994 3994 }
3995 3995
3996 3996 DBG_PRINTF((category, RSM_DEBUG,
3997 3997 "rsm_add_credits:path=%lx new-creds=%d "
3998 3998 "curr credits=%d src=%lx:%llx\n", path, msg->rsmipc_credits,
3999 3999 path->sendq_token.msgbuf_avail, msghdr->rsmipc_src,
4000 4000 src_hwaddr));
4001 4001
4002 4002
4003 4003 /* add credits to the path's sendq */
4004 4004 path->sendq_token.msgbuf_avail += msg->rsmipc_credits;
4005 4005
4006 4006 ASSERT(path->sendq_token.msgbuf_avail <= RSMIPC_MAX_MESSAGES);
4007 4007
4008 4008 /* wake up any send that is waiting for credits */
4009 4009 cv_broadcast(&path->sendq_token.sendq_cv);
4010 4010
4011 4011 /* decrement the refcnt */
4012 4012 PATH_RELE_NOLOCK(path);
4013 4013
4014 4014 mutex_exit(&path->mutex);
4015 4015
4016 4016 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits done\n"));
4017 4017 }
4018 4018
4019 4019 static void
4020 4020 rsm_intr_event(rsmipc_request_t *msg)
4021 4021 {
4022 4022 rsmseg_t *seg;
4023 4023 rsmresource_t *p;
4024 4024 rsm_node_id_t src_node;
4025 4025 DBG_DEFINE(category,
4026 4026 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4027 4027
4028 4028 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event enter\n"));
4029 4029
4030 4030 src_node = msg->rsmipc_hdr.rsmipc_src;
4031 4031
4032 4032 if ((seg = msg->rsmipc_segment_cookie) != NULL) {
4033 4033 /* This is for an import segment */
4034 4034 uint_t hashval = rsmhash(msg->rsmipc_key);
4035 4035
↓ open down ↓ |
4035 lines elided |
↑ open up ↑ |
4036 4036 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4037 4037
4038 4038 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4039 4039
4040 4040 for (; p; p = p->rsmrc_next) {
4041 4041 if ((p->rsmrc_key == msg->rsmipc_key) &&
4042 4042 (p->rsmrc_node == src_node)) {
4043 4043 seg = (rsmseg_t *)p;
4044 4044 rsmseglock_acquire(seg);
4045 4045
4046 - atomic_add_32(&seg->s_pollevent, 1);
4046 + atomic_inc_32(&seg->s_pollevent);
4047 4047
4048 4048 if (seg->s_pollflag & RSM_SEGMENT_POLL)
4049 4049 pollwakeup(&seg->s_poll, POLLRDNORM);
4050 4050
4051 4051 rsmseglock_release(seg);
4052 4052 }
4053 4053 }
4054 4054
4055 4055 rw_exit(&rsm_import_segs.rsmhash_rw);
4056 4056 } else {
4057 4057 /* This is for an export segment */
4058 4058 seg = rsmexport_lookup(msg->rsmipc_key);
4059 4059 if (!seg) {
4060 4060 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4061 4061 "rsm_intr_event done: exp seg not found\n"));
4062 4062 return;
4063 4063 }
4064 4064
4065 4065 ASSERT(rsmseglock_held(seg));
4066 4066
4067 - atomic_add_32(&seg->s_pollevent, 1);
4067 + atomic_inc_32(&seg->s_pollevent);
4068 4068
4069 4069 /*
4070 4070 * We must hold the segment lock here, or else the segment
4071 4071 * can be freed while pollwakeup is using it. This implies
4072 4072 * that we MUST NOT grab the segment lock during rsm_chpoll,
4073 4073 * as outlined in the chpoll(2) man page.
4074 4074 */
4075 4075 if (seg->s_pollflag & RSM_SEGMENT_POLL)
4076 4076 pollwakeup(&seg->s_poll, POLLRDNORM);
4077 4077
4078 4078 rsmseglock_release(seg);
4079 4079 }
4080 4080
4081 4081 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event done\n"));
4082 4082 }
4083 4083
4084 4084 /*
4085 4085 * The exporter did a republish and changed the ACL - this change is only
4086 4086 * visible to new importers.
4087 4087 */
4088 4088 static void
4089 4089 importer_update(rsm_node_id_t src_node, rsm_memseg_id_t key,
4090 4090 rsm_permission_t perm)
4091 4091 {
4092 4092
4093 4093 rsmresource_t *p;
4094 4094 rsmseg_t *seg;
4095 4095 uint_t hashval = rsmhash(key);
4096 4096 DBG_DEFINE(category,
4097 4097 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4098 4098
4099 4099 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update enter\n"));
4100 4100
4101 4101 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4102 4102
4103 4103 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4104 4104
4105 4105 for (; p; p = p->rsmrc_next) {
4106 4106 /*
4107 4107 * find the importer and update the permission in the shared
4108 4108 * data structure. Any new importers will use the new perms
4109 4109 */
4110 4110 if ((p->rsmrc_key == key) && (p->rsmrc_node == src_node)) {
4111 4111 seg = (rsmseg_t *)p;
4112 4112
4113 4113 rsmseglock_acquire(seg);
4114 4114 rsmsharelock_acquire(seg);
4115 4115 seg->s_share->rsmsi_mode = perm;
4116 4116 rsmsharelock_release(seg);
4117 4117 rsmseglock_release(seg);
4118 4118
4119 4119 break;
4120 4120 }
4121 4121 }
4122 4122
4123 4123 rw_exit(&rsm_import_segs.rsmhash_rw);
4124 4124
4125 4125 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update done\n"));
4126 4126 }
4127 4127
4128 4128 void
4129 4129 rsm_suspend_complete(rsm_node_id_t src_node, int flag)
4130 4130 {
4131 4131 int done = 1; /* indicate all SUSPENDS have been acked */
4132 4132 list_element_t *elem;
4133 4133 DBG_DEFINE(category,
4134 4134 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4135 4135
4136 4136 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4137 4137 "rsm_suspend_complete enter\n"));
4138 4138
4139 4139 mutex_enter(&rsm_suspend_list.list_lock);
4140 4140
4141 4141 if (rsm_suspend_list.list_head == NULL) {
4142 4142 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4143 4143 "rsm_suspend_complete done: suspend_list is empty\n"));
4144 4144 mutex_exit(&rsm_suspend_list.list_lock);
4145 4145 return;
4146 4146 }
4147 4147
4148 4148 elem = rsm_suspend_list.list_head;
4149 4149 while (elem != NULL) {
4150 4150 if (elem->nodeid == src_node) {
4151 4151 /* clear the pending flag for the node */
4152 4152 elem->flags &= ~RSM_SUSPEND_ACKPENDING;
4153 4153 elem->flags |= flag;
4154 4154 }
4155 4155
4156 4156 if (done && (elem->flags & RSM_SUSPEND_ACKPENDING))
4157 4157 done = 0; /* still some nodes have not yet ACKED */
4158 4158
4159 4159 elem = elem->next;
4160 4160 }
4161 4161
4162 4162 mutex_exit(&rsm_suspend_list.list_lock);
4163 4163
4164 4164 if (!done) {
4165 4165 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4166 4166 "rsm_suspend_complete done: acks pending\n"));
4167 4167 return;
4168 4168 }
4169 4169 /*
4170 4170 * Now that we are done with suspending all the remote importers
4171 4171 * time to quiesce the local exporters
4172 4172 */
4173 4173 exporter_quiesce();
4174 4174
4175 4175 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4176 4176 "rsm_suspend_complete done\n"));
4177 4177 }
4178 4178
4179 4179 static void
4180 4180 exporter_quiesce()
4181 4181 {
4182 4182 int i, e;
4183 4183 rsmresource_t *current;
4184 4184 rsmseg_t *seg;
4185 4185 adapter_t *adapter;
4186 4186 DBG_DEFINE(category,
4187 4187 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4188 4188
4189 4189 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce enter\n"));
4190 4190 /*
4191 4191 * The importers send a SUSPEND_COMPLETE to the exporter node
4192 4192 * Unpublish, unbind the export segment and
4193 4193 * move the segments to the EXPORT_QUIESCED state
4194 4194 */
4195 4195
4196 4196 rw_enter(&rsm_export_segs.rsmhash_rw, RW_READER);
4197 4197
4198 4198 for (i = 0; i < rsm_hash_size; i++) {
4199 4199 current = rsm_export_segs.bucket[i];
4200 4200 while (current != NULL) {
4201 4201 seg = (rsmseg_t *)current;
4202 4202 rsmseglock_acquire(seg);
4203 4203 if (current->rsmrc_state ==
4204 4204 RSM_STATE_EXPORT_QUIESCING) {
4205 4205 adapter = seg->s_adapter;
4206 4206 /*
4207 4207 * some local memory handles are not published
4208 4208 * check if it was published
4209 4209 */
4210 4210 if ((seg->s_acl == NULL) ||
4211 4211 (seg->s_acl[0].ae_node != my_nodeid) ||
4212 4212 (seg->s_acl[0].ae_permission != 0)) {
4213 4213
4214 4214 e = adapter->rsmpi_ops->rsm_unpublish(
4215 4215 seg->s_handle.out);
4216 4216 DBG_PRINTF((category, RSM_DEBUG,
4217 4217 "exporter_quiesce:unpub %d\n", e));
4218 4218
4219 4219 e = adapter->rsmpi_ops->rsm_seg_destroy(
4220 4220 seg->s_handle.out);
4221 4221
4222 4222 DBG_PRINTF((category, RSM_DEBUG,
4223 4223 "exporter_quiesce:destroy %d\n",
4224 4224 e));
4225 4225 }
4226 4226
4227 4227 (void) rsm_unbind_pages(seg);
4228 4228 seg->s_state = RSM_STATE_EXPORT_QUIESCED;
4229 4229 cv_broadcast(&seg->s_cv);
4230 4230 }
4231 4231 rsmseglock_release(seg);
4232 4232 current = current->rsmrc_next;
4233 4233 }
4234 4234 }
4235 4235 rw_exit(&rsm_export_segs.rsmhash_rw);
4236 4236
4237 4237 /*
4238 4238 * All the local segments we are done with the pre-del processing
4239 4239 * - time to move to PREDEL_COMPLETED.
4240 4240 */
4241 4241
4242 4242 mutex_enter(&rsm_drv_data.drv_lock);
4243 4243
4244 4244 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED);
4245 4245
4246 4246 rsm_drv_data.drv_state = RSM_DRV_PREDEL_COMPLETED;
4247 4247
4248 4248 cv_broadcast(&rsm_drv_data.drv_cv);
4249 4249
4250 4250 mutex_exit(&rsm_drv_data.drv_lock);
4251 4251
4252 4252 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce done\n"));
4253 4253 }
4254 4254
4255 4255 static void
4256 4256 importer_suspend(rsm_node_id_t src_node)
4257 4257 {
4258 4258 int i;
4259 4259 int susp_flg; /* true means already suspended */
4260 4260 int num_importers;
4261 4261 rsmresource_t *p = NULL, *curp;
4262 4262 rsmhash_table_t *rhash = &rsm_import_segs;
4263 4263 rsmseg_t *seg;
4264 4264 rsmipc_request_t request;
4265 4265 DBG_DEFINE(category,
4266 4266 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4267 4267
4268 4268 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend enter\n"));
4269 4269
4270 4270 rw_enter(&rhash->rsmhash_rw, RW_READER);
4271 4271 for (i = 0; i < rsm_hash_size; i++) {
4272 4272 p = rhash->bucket[i];
4273 4273
4274 4274 /*
4275 4275 * Suspend all importers with same <node, key> pair.
4276 4276 * After the last one of the shared importers has been
4277 4277 * suspended - suspend the shared mappings/connection.
4278 4278 */
4279 4279 for (; p; p = p->rsmrc_next) {
4280 4280 rsmseg_t *first = (rsmseg_t *)p;
4281 4281 if ((first->s_node != src_node) ||
4282 4282 (first->s_state == RSM_STATE_DISCONNECT))
4283 4283 continue; /* go to next entry */
4284 4284 /*
4285 4285 * search the rest of the bucket for
4286 4286 * other siblings (imprtrs with the same key)
4287 4287 * of "first" and suspend them.
4288 4288 * All importers with same key fall in
4289 4289 * the same bucket.
4290 4290 */
4291 4291 num_importers = 0;
4292 4292 for (curp = p; curp; curp = curp->rsmrc_next) {
4293 4293 seg = (rsmseg_t *)curp;
4294 4294
4295 4295 rsmseglock_acquire(seg);
4296 4296
4297 4297 if ((seg->s_node != first->s_node) ||
4298 4298 (seg->s_key != first->s_key) ||
4299 4299 (seg->s_state == RSM_STATE_DISCONNECT)) {
4300 4300 /*
4301 4301 * either not a peer segment or its a
4302 4302 * disconnected segment - skip it
4303 4303 */
4304 4304 rsmseglock_release(seg);
4305 4305 continue;
4306 4306 }
4307 4307
4308 4308 rsmseg_suspend(seg, &susp_flg);
4309 4309
4310 4310 if (susp_flg) { /* seg already suspended */
4311 4311 rsmseglock_release(seg);
4312 4312 break; /* the inner for loop */
4313 4313 }
4314 4314
4315 4315 num_importers++;
4316 4316 rsmsharelock_acquire(seg);
4317 4317 /*
4318 4318 * we've processed all importers that are
4319 4319 * siblings of "first"
4320 4320 */
4321 4321 if (num_importers ==
4322 4322 seg->s_share->rsmsi_refcnt) {
4323 4323 rsmsharelock_release(seg);
4324 4324 rsmseglock_release(seg);
4325 4325 break;
4326 4326 }
4327 4327 rsmsharelock_release(seg);
4328 4328 rsmseglock_release(seg);
4329 4329 }
4330 4330
4331 4331 /*
4332 4332 * All the importers with the same key and
4333 4333 * nodeid as "first" have been suspended.
4334 4334 * Now suspend the shared connect/mapping.
4335 4335 * This is done only once.
4336 4336 */
4337 4337 if (!susp_flg) {
4338 4338 rsmsegshare_suspend(seg);
4339 4339 }
4340 4340 }
4341 4341 }
4342 4342
4343 4343 rw_exit(&rhash->rsmhash_rw);
4344 4344
4345 4345 /* send an ACK for SUSPEND message */
4346 4346 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND_DONE;
4347 4347 (void) rsmipc_send(src_node, &request, RSM_NO_REPLY);
4348 4348
4349 4349
4350 4350 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend done\n"));
4351 4351
4352 4352 }
4353 4353
4354 4354 static void
4355 4355 rsmseg_suspend(rsmseg_t *seg, int *susp_flg)
4356 4356 {
4357 4357 int recheck_state;
4358 4358 rsmcookie_t *hdl;
4359 4359 DBG_DEFINE(category,
4360 4360 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4361 4361
4362 4362 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4363 4363 "rsmseg_suspend enter: key=%u\n", seg->s_key));
4364 4364
4365 4365 *susp_flg = 0;
4366 4366
4367 4367 ASSERT(rsmseglock_held(seg));
4368 4368 /* wait if putv/getv is in progress */
4369 4369 while (seg->s_rdmacnt > 0)
4370 4370 cv_wait(&seg->s_cv, &seg->s_lock);
4371 4371
4372 4372 do {
4373 4373 recheck_state = 0;
4374 4374
4375 4375 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4376 4376 "rsmseg_suspend:segment %x state=%d\n",
4377 4377 seg->s_key, seg->s_state));
4378 4378
4379 4379 switch (seg->s_state) {
4380 4380 case RSM_STATE_NEW:
4381 4381 /* not a valid state */
4382 4382 break;
4383 4383 case RSM_STATE_CONNECTING:
4384 4384 seg->s_state = RSM_STATE_ABORT_CONNECT;
4385 4385 break;
4386 4386 case RSM_STATE_ABORT_CONNECT:
4387 4387 break;
4388 4388 case RSM_STATE_CONNECT:
4389 4389 seg->s_handle.in = NULL;
4390 4390 seg->s_state = RSM_STATE_CONN_QUIESCE;
4391 4391 break;
4392 4392 case RSM_STATE_MAPPING:
4393 4393 /* wait until segment leaves the mapping state */
4394 4394 while (seg->s_state == RSM_STATE_MAPPING)
4395 4395 cv_wait(&seg->s_cv, &seg->s_lock);
4396 4396 recheck_state = 1;
4397 4397 break;
4398 4398 case RSM_STATE_ACTIVE:
4399 4399 /* unload the mappings */
4400 4400 if (seg->s_ckl != NULL) {
4401 4401 hdl = seg->s_ckl;
4402 4402 for (; hdl != NULL; hdl = hdl->c_next) {
4403 4403 (void) devmap_unload(hdl->c_dhp,
4404 4404 hdl->c_off, hdl->c_len);
4405 4405 }
4406 4406 }
4407 4407 seg->s_mapinfo = NULL;
4408 4408 seg->s_state = RSM_STATE_MAP_QUIESCE;
4409 4409 break;
4410 4410 case RSM_STATE_CONN_QUIESCE:
4411 4411 /* FALLTHRU */
4412 4412 case RSM_STATE_MAP_QUIESCE:
4413 4413 /* rsmseg_suspend already done for seg */
4414 4414 *susp_flg = 1;
4415 4415 break;
4416 4416 case RSM_STATE_DISCONNECT:
4417 4417 break;
4418 4418 default:
4419 4419 ASSERT(0); /* invalid state */
4420 4420 }
4421 4421 } while (recheck_state);
4422 4422
4423 4423 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_suspend done\n"));
4424 4424 }
4425 4425
4426 4426 static void
4427 4427 rsmsegshare_suspend(rsmseg_t *seg)
4428 4428 {
4429 4429 int e;
4430 4430 adapter_t *adapter;
4431 4431 rsm_import_share_t *sharedp;
4432 4432 DBG_DEFINE(category,
4433 4433 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4434 4434
4435 4435 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4436 4436 "rsmsegshare_suspend enter\n"));
4437 4437
4438 4438 rsmseglock_acquire(seg);
4439 4439 rsmsharelock_acquire(seg);
4440 4440
4441 4441 sharedp = seg->s_share;
4442 4442 adapter = seg->s_adapter;
4443 4443 switch (sharedp->rsmsi_state) {
4444 4444 case RSMSI_STATE_NEW:
4445 4445 break;
4446 4446 case RSMSI_STATE_CONNECTING:
4447 4447 sharedp->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
4448 4448 break;
4449 4449 case RSMSI_STATE_ABORT_CONNECT:
4450 4450 break;
4451 4451 case RSMSI_STATE_CONNECTED:
4452 4452 /* do the rsmpi disconnect */
4453 4453 if (sharedp->rsmsi_node != my_nodeid) {
4454 4454 e = adapter->rsmpi_ops->
4455 4455 rsm_disconnect(sharedp->rsmsi_handle);
4456 4456
4457 4457 DBG_PRINTF((category, RSM_DEBUG,
4458 4458 "rsm:rsmpi disconnect seg=%x:err=%d\n",
4459 4459 sharedp->rsmsi_segid, e));
4460 4460 }
4461 4461
4462 4462 sharedp->rsmsi_handle = NULL;
4463 4463
4464 4464 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
4465 4465 break;
4466 4466 case RSMSI_STATE_CONN_QUIESCE:
4467 4467 break;
4468 4468 case RSMSI_STATE_MAPPED:
4469 4469 /* do the rsmpi unmap and disconnect */
4470 4470 if (sharedp->rsmsi_node != my_nodeid) {
4471 4471 e = adapter->rsmpi_ops->rsm_unmap(seg->s_handle.in);
4472 4472
4473 4473 DBG_PRINTF((category, RSM_DEBUG,
4474 4474 "rsmshare_suspend: rsmpi unmap %d\n", e));
4475 4475
4476 4476 e = adapter->rsmpi_ops->
4477 4477 rsm_disconnect(sharedp->rsmsi_handle);
4478 4478 DBG_PRINTF((category, RSM_DEBUG,
4479 4479 "rsm:rsmpi disconnect seg=%x:err=%d\n",
4480 4480 sharedp->rsmsi_segid, e));
4481 4481 }
4482 4482
4483 4483 sharedp->rsmsi_handle = NULL;
4484 4484
4485 4485 sharedp->rsmsi_state = RSMSI_STATE_MAP_QUIESCE;
4486 4486 break;
4487 4487 case RSMSI_STATE_MAP_QUIESCE:
4488 4488 break;
4489 4489 case RSMSI_STATE_DISCONNECTED:
4490 4490 break;
4491 4491 default:
4492 4492 ASSERT(0); /* invalid state */
4493 4493 }
4494 4494
4495 4495 rsmsharelock_release(seg);
4496 4496 rsmseglock_release(seg);
4497 4497
4498 4498 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4499 4499 "rsmsegshare_suspend done\n"));
4500 4500 }
4501 4501
4502 4502 /*
4503 4503 * This should get called on receiving a RESUME message or from
4504 4504 * the pathmanger if the node undergoing DR dies.
4505 4505 */
4506 4506 static void
4507 4507 importer_resume(rsm_node_id_t src_node)
4508 4508 {
4509 4509 int i;
4510 4510 rsmresource_t *p = NULL;
4511 4511 rsmhash_table_t *rhash = &rsm_import_segs;
4512 4512 void *cookie;
4513 4513 DBG_DEFINE(category,
4514 4514 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4515 4515
4516 4516 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume enter\n"));
4517 4517
4518 4518 rw_enter(&rhash->rsmhash_rw, RW_READER);
4519 4519
4520 4520 for (i = 0; i < rsm_hash_size; i++) {
4521 4521 p = rhash->bucket[i];
4522 4522
4523 4523 for (; p; p = p->rsmrc_next) {
4524 4524 rsmseg_t *seg = (rsmseg_t *)p;
4525 4525
4526 4526 rsmseglock_acquire(seg);
4527 4527
4528 4528 /* process only importers of node undergoing DR */
4529 4529 if (seg->s_node != src_node) {
4530 4530 rsmseglock_release(seg);
4531 4531 continue;
4532 4532 }
4533 4533
4534 4534 if (rsmseg_resume(seg, &cookie) != RSM_SUCCESS) {
4535 4535 rsmipc_request_t request;
4536 4536 /*
4537 4537 * rsmpi map/connect failed
4538 4538 * inform the exporter so that it can
4539 4539 * remove the importer.
4540 4540 */
4541 4541 request.rsmipc_hdr.rsmipc_type =
4542 4542 RSMIPC_MSG_NOTIMPORTING;
4543 4543 request.rsmipc_key = seg->s_segid;
4544 4544 request.rsmipc_segment_cookie = cookie;
4545 4545 rsmseglock_release(seg);
4546 4546 (void) rsmipc_send(seg->s_node, &request,
4547 4547 RSM_NO_REPLY);
4548 4548 } else {
4549 4549 rsmseglock_release(seg);
4550 4550 }
4551 4551 }
4552 4552 }
4553 4553
4554 4554 rw_exit(&rhash->rsmhash_rw);
4555 4555
4556 4556 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume done\n"));
4557 4557 }
4558 4558
4559 4559 static int
4560 4560 rsmseg_resume(rsmseg_t *seg, void **cookie)
4561 4561 {
4562 4562 int e;
4563 4563 int retc;
4564 4564 off_t dev_offset;
4565 4565 size_t maplen;
4566 4566 uint_t maxprot;
4567 4567 rsm_mapinfo_t *p;
4568 4568 rsmcookie_t *hdl;
4569 4569 rsm_import_share_t *sharedp;
4570 4570 DBG_DEFINE(category,
4571 4571 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4572 4572
4573 4573 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4574 4574 "rsmseg_resume enter: key=%u\n", seg->s_key));
4575 4575
4576 4576 *cookie = NULL;
4577 4577
4578 4578 ASSERT(rsmseglock_held(seg));
4579 4579
4580 4580 if ((seg->s_state != RSM_STATE_CONN_QUIESCE) &&
4581 4581 (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
4582 4582 return (RSM_SUCCESS);
4583 4583 }
4584 4584
4585 4585 sharedp = seg->s_share;
4586 4586
4587 4587 rsmsharelock_acquire(seg);
4588 4588
4589 4589 /* resume the shared connection and/or mapping */
4590 4590 retc = rsmsegshare_resume(seg);
4591 4591
4592 4592 if (seg->s_state == RSM_STATE_CONN_QUIESCE) {
4593 4593 /* shared state can either be connected or mapped */
4594 4594 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) ||
4595 4595 (sharedp->rsmsi_state == RSMSI_STATE_MAPPED)) {
4596 4596 ASSERT(retc == RSM_SUCCESS);
4597 4597 seg->s_handle.in = sharedp->rsmsi_handle;
4598 4598 rsmsharelock_release(seg);
4599 4599 seg->s_state = RSM_STATE_CONNECT;
4600 4600
4601 4601 } else { /* error in rsmpi connect during resume */
4602 4602 seg->s_handle.in = NULL;
4603 4603 seg->s_state = RSM_STATE_DISCONNECT;
4604 4604
4605 4605 sharedp->rsmsi_refcnt--;
4606 4606 cookie = (void *)sharedp->rsmsi_cookie;
4607 4607
4608 4608 if (sharedp->rsmsi_refcnt == 0) {
4609 4609 ASSERT(sharedp->rsmsi_mapcnt == 0);
4610 4610 rsmsharelock_release(seg);
4611 4611
4612 4612 /* clean up the shared data structure */
4613 4613 mutex_destroy(&sharedp->rsmsi_lock);
4614 4614 cv_destroy(&sharedp->rsmsi_cv);
4615 4615 kmem_free((void *)(sharedp),
4616 4616 sizeof (rsm_import_share_t));
4617 4617
4618 4618 } else {
4619 4619 rsmsharelock_release(seg);
4620 4620 }
4621 4621 /*
4622 4622 * The following needs to be done after any
4623 4623 * rsmsharelock calls which use seg->s_share.
4624 4624 */
4625 4625 seg->s_share = NULL;
4626 4626 }
4627 4627
4628 4628 /* signal any waiting segment */
4629 4629 cv_broadcast(&seg->s_cv);
4630 4630
4631 4631 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4632 4632 "rsmseg_resume done:state=%d\n", seg->s_state));
4633 4633 return (retc);
4634 4634 }
4635 4635
4636 4636 ASSERT(seg->s_state == RSM_STATE_MAP_QUIESCE);
4637 4637
4638 4638 /* Setup protections for remap */
4639 4639 maxprot = PROT_USER;
4640 4640 if (seg->s_mode & RSM_PERM_READ) {
4641 4641 maxprot |= PROT_READ;
4642 4642 }
4643 4643 if (seg->s_mode & RSM_PERM_WRITE) {
4644 4644 maxprot |= PROT_WRITE;
4645 4645 }
4646 4646
4647 4647 if (sharedp->rsmsi_state != RSMSI_STATE_MAPPED) {
4648 4648 /* error in rsmpi connect or map during resume */
4649 4649
4650 4650 /* remap to trash page */
4651 4651 ASSERT(seg->s_ckl != NULL);
4652 4652
4653 4653 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4654 4654 e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
4655 4655 remap_cookie, hdl->c_off, hdl->c_len,
4656 4656 maxprot, 0, NULL);
4657 4657
4658 4658 DBG_PRINTF((category, RSM_ERR,
4659 4659 "rsmseg_resume:remap=%d\n", e));
4660 4660 }
4661 4661
4662 4662 seg->s_handle.in = NULL;
4663 4663 seg->s_state = RSM_STATE_DISCONNECT;
4664 4664
4665 4665 sharedp->rsmsi_refcnt--;
4666 4666
4667 4667 sharedp->rsmsi_mapcnt--;
4668 4668 seg->s_mapinfo = NULL;
4669 4669
4670 4670 if (sharedp->rsmsi_refcnt == 0) {
4671 4671 ASSERT(sharedp->rsmsi_mapcnt == 0);
4672 4672 rsmsharelock_release(seg);
4673 4673
4674 4674 /* clean up the shared data structure */
4675 4675 mutex_destroy(&sharedp->rsmsi_lock);
4676 4676 cv_destroy(&sharedp->rsmsi_cv);
4677 4677 kmem_free((void *)(sharedp),
4678 4678 sizeof (rsm_import_share_t));
4679 4679
4680 4680 } else {
4681 4681 rsmsharelock_release(seg);
4682 4682 }
4683 4683 /*
4684 4684 * The following needs to be done after any
4685 4685 * rsmsharelock calls which use seg->s_share.
4686 4686 */
4687 4687 seg->s_share = NULL;
4688 4688
4689 4689 /* signal any waiting segment */
4690 4690 cv_broadcast(&seg->s_cv);
4691 4691
4692 4692 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4693 4693 "rsmseg_resume done:seg=%x,err=%d\n",
4694 4694 seg->s_key, retc));
4695 4695 return (retc);
4696 4696
4697 4697 }
4698 4698
4699 4699 seg->s_handle.in = sharedp->rsmsi_handle;
4700 4700
4701 4701 if (seg->s_node == my_nodeid) { /* loopback */
4702 4702 ASSERT(seg->s_mapinfo == NULL);
4703 4703
4704 4704 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4705 4705 e = devmap_umem_remap(hdl->c_dhp,
4706 4706 rsm_dip, seg->s_cookie,
4707 4707 hdl->c_off, hdl->c_len,
4708 4708 maxprot, 0, NULL);
4709 4709
4710 4710 DBG_PRINTF((category, RSM_ERR,
4711 4711 "rsmseg_resume:remap=%d\n", e));
4712 4712 }
4713 4713 } else { /* remote exporter */
4714 4714 /* remap to the new rsmpi maps */
4715 4715 seg->s_mapinfo = sharedp->rsmsi_mapinfo;
4716 4716
4717 4717 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4718 4718 p = rsm_get_mapinfo(seg, hdl->c_off, hdl->c_len,
4719 4719 &dev_offset, &maplen);
4720 4720 e = devmap_devmem_remap(hdl->c_dhp,
4721 4721 p->dip, p->dev_register, dev_offset,
4722 4722 maplen, maxprot, 0, NULL);
4723 4723
4724 4724 DBG_PRINTF((category, RSM_ERR,
4725 4725 "rsmseg_resume:remap=%d\n", e));
4726 4726 }
4727 4727 }
4728 4728
4729 4729 rsmsharelock_release(seg);
4730 4730
4731 4731 seg->s_state = RSM_STATE_ACTIVE;
4732 4732 cv_broadcast(&seg->s_cv);
4733 4733
4734 4734 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_resume done\n"));
4735 4735
4736 4736 return (retc);
4737 4737 }
4738 4738
4739 4739 static int
4740 4740 rsmsegshare_resume(rsmseg_t *seg)
4741 4741 {
4742 4742 int e = RSM_SUCCESS;
4743 4743 adapter_t *adapter;
4744 4744 rsm_import_share_t *sharedp;
4745 4745 DBG_DEFINE(category,
4746 4746 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4747 4747
4748 4748 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume enter\n"));
4749 4749
4750 4750 ASSERT(rsmseglock_held(seg));
4751 4751 ASSERT(rsmsharelock_held(seg));
4752 4752
4753 4753 sharedp = seg->s_share;
4754 4754
4755 4755 /*
4756 4756 * If we are not in a xxxx_QUIESCE state that means shared
4757 4757 * connect/mapping processing has been already been done
4758 4758 * so return success.
4759 4759 */
4760 4760 if ((sharedp->rsmsi_state != RSMSI_STATE_CONN_QUIESCE) &&
4761 4761 (sharedp->rsmsi_state != RSMSI_STATE_MAP_QUIESCE)) {
4762 4762 return (RSM_SUCCESS);
4763 4763 }
4764 4764
4765 4765 adapter = seg->s_adapter;
4766 4766
4767 4767 if (sharedp->rsmsi_node != my_nodeid) {
4768 4768 rsm_addr_t hwaddr;
4769 4769 hwaddr = get_remote_hwaddr(adapter, sharedp->rsmsi_node);
4770 4770
4771 4771 e = adapter->rsmpi_ops->rsm_connect(
4772 4772 adapter->rsmpi_handle, hwaddr,
4773 4773 sharedp->rsmsi_segid, &sharedp->rsmsi_handle);
4774 4774
4775 4775 DBG_PRINTF((category, RSM_DEBUG,
4776 4776 "rsmsegshare_resume:rsmpi connect seg=%x:err=%d\n",
4777 4777 sharedp->rsmsi_segid, e));
4778 4778
4779 4779 if (e != RSM_SUCCESS) {
4780 4780 /* when do we send the NOT_IMPORTING message */
4781 4781 sharedp->rsmsi_handle = NULL;
4782 4782 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4783 4783 /* signal any waiting segment */
4784 4784 cv_broadcast(&sharedp->rsmsi_cv);
4785 4785 return (e);
4786 4786 }
4787 4787 }
4788 4788
4789 4789 if (sharedp->rsmsi_state == RSMSI_STATE_CONN_QUIESCE) {
4790 4790 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
4791 4791 /* signal any waiting segment */
4792 4792 cv_broadcast(&sharedp->rsmsi_cv);
4793 4793 return (e);
4794 4794 }
4795 4795
4796 4796 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
4797 4797
4798 4798 /* do the rsmpi map of the whole segment here */
4799 4799 if (sharedp->rsmsi_node != my_nodeid) {
4800 4800 size_t mapped_len;
4801 4801 rsm_mapinfo_t *p;
4802 4802
4803 4803 /*
4804 4804 * We need to do rsmpi maps with <off, lens> identical to
4805 4805 * the old mapinfo list because the segment mapping handles
4806 4806 * dhp and such need the fragmentation of rsmpi maps to be
4807 4807 * identical to what it was during the mmap of the segment
4808 4808 */
4809 4809 p = sharedp->rsmsi_mapinfo;
4810 4810
4811 4811 while (p != NULL) {
4812 4812 mapped_len = 0;
4813 4813
4814 4814 e = adapter->rsmpi_ops->rsm_map(
4815 4815 sharedp->rsmsi_handle, p->start_offset,
4816 4816 p->individual_len, &mapped_len,
4817 4817 &p->dip, &p->dev_register, &p->dev_offset,
4818 4818 NULL, NULL);
4819 4819
4820 4820 if (e != 0) {
4821 4821 DBG_PRINTF((category, RSM_ERR,
4822 4822 "rsmsegshare_resume: rsmpi map err=%d\n",
4823 4823 e));
4824 4824 break;
4825 4825 }
4826 4826
4827 4827 if (mapped_len != p->individual_len) {
4828 4828 DBG_PRINTF((category, RSM_ERR,
4829 4829 "rsmsegshare_resume: rsmpi maplen"
4830 4830 "< reqlen=%lx\n", mapped_len));
4831 4831 e = RSMERR_BAD_LENGTH;
4832 4832 break;
4833 4833 }
4834 4834
4835 4835 p = p->next;
4836 4836
4837 4837 }
4838 4838
4839 4839
4840 4840 if (e != RSM_SUCCESS) { /* rsmpi map failed */
4841 4841 int err;
4842 4842 /* Check if this is the first rsm_map */
4843 4843 if (p != sharedp->rsmsi_mapinfo) {
4844 4844 /*
4845 4845 * A single rsm_unmap undoes multiple rsm_maps.
4846 4846 */
4847 4847 (void) seg->s_adapter->rsmpi_ops->
4848 4848 rsm_unmap(sharedp->rsmsi_handle);
4849 4849 }
4850 4850
4851 4851 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
4852 4852 sharedp->rsmsi_mapinfo = NULL;
4853 4853
4854 4854 err = adapter->rsmpi_ops->
4855 4855 rsm_disconnect(sharedp->rsmsi_handle);
4856 4856
4857 4857 DBG_PRINTF((category, RSM_DEBUG,
4858 4858 "rsmsegshare_resume:disconn seg=%x:err=%d\n",
4859 4859 sharedp->rsmsi_segid, err));
4860 4860
4861 4861 sharedp->rsmsi_handle = NULL;
4862 4862 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4863 4863
4864 4864 /* signal the waiting segments */
4865 4865 cv_broadcast(&sharedp->rsmsi_cv);
4866 4866 DBG_PRINTF((category, RSM_DEBUG,
4867 4867 "rsmsegshare_resume done: rsmpi map err\n"));
4868 4868 return (e);
4869 4869 }
4870 4870 }
4871 4871
4872 4872 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
4873 4873
4874 4874 /* signal any waiting segment */
4875 4875 cv_broadcast(&sharedp->rsmsi_cv);
4876 4876
4877 4877 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume done\n"));
4878 4878
4879 4879 return (e);
4880 4880 }
4881 4881
4882 4882 /*
4883 4883 * this is the routine that gets called by recv_taskq which is the
4884 4884 * thread that processes messages that are flow-controlled.
4885 4885 */
4886 4886 static void
4887 4887 rsm_intr_proc_deferred(void *arg)
4888 4888 {
4889 4889 path_t *path = (path_t *)arg;
4890 4890 rsmipc_request_t *msg;
4891 4891 rsmipc_msghdr_t *msghdr;
4892 4892 rsm_node_id_t src_node;
4893 4893 msgbuf_elem_t *head;
4894 4894 int e;
4895 4895 DBG_DEFINE(category,
4896 4896 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4897 4897
4898 4898 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4899 4899 "rsm_intr_proc_deferred enter\n"));
4900 4900
4901 4901 mutex_enter(&path->mutex);
4902 4902
4903 4903 /* use the head of the msgbuf_queue */
4904 4904 head = rsmka_gethead_msgbuf(path);
4905 4905
4906 4906 mutex_exit(&path->mutex);
4907 4907
4908 4908 msg = (rsmipc_request_t *)&(head->msg);
4909 4909 msghdr = (rsmipc_msghdr_t *)msg;
4910 4910
4911 4911 src_node = msghdr->rsmipc_src;
4912 4912
4913 4913 /*
4914 4914 * messages that need to send a reply should check the message version
4915 4915 * before processing the message. And all messages that need to
4916 4916 * send a reply should be processed here by the worker thread.
4917 4917 */
4918 4918 switch (msghdr->rsmipc_type) {
4919 4919 case RSMIPC_MSG_SEGCONNECT:
4920 4920 if (msghdr->rsmipc_version != RSM_VERSION) {
4921 4921 rsmipc_reply_t reply;
4922 4922 reply.rsmipc_status = RSMERR_BAD_DRIVER_VERSION;
4923 4923 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
4924 4924 reply.rsmipc_hdr.rsmipc_cookie = msghdr->rsmipc_cookie;
4925 4925 (void) rsmipc_send(msghdr->rsmipc_src, NULL, &reply);
4926 4926 } else {
4927 4927 rsm_intr_segconnect(src_node, msg);
4928 4928 }
4929 4929 break;
4930 4930 case RSMIPC_MSG_DISCONNECT:
4931 4931 rsm_force_unload(src_node, msg->rsmipc_key, DISCONNECT);
4932 4932 break;
4933 4933 case RSMIPC_MSG_SUSPEND:
4934 4934 importer_suspend(src_node);
4935 4935 break;
4936 4936 case RSMIPC_MSG_SUSPEND_DONE:
4937 4937 rsm_suspend_complete(src_node, 0);
4938 4938 break;
4939 4939 case RSMIPC_MSG_RESUME:
4940 4940 importer_resume(src_node);
4941 4941 break;
4942 4942 default:
4943 4943 ASSERT(0);
4944 4944 }
4945 4945
4946 4946 mutex_enter(&path->mutex);
4947 4947
4948 4948 rsmka_dequeue_msgbuf(path);
4949 4949
4950 4950 /* incr procmsg_cnt can be at most RSMIPC_MAX_MESSAGES */
4951 4951 if (path->procmsg_cnt < RSMIPC_MAX_MESSAGES)
4952 4952 path->procmsg_cnt++;
4953 4953
4954 4954 ASSERT(path->procmsg_cnt <= RSMIPC_MAX_MESSAGES);
4955 4955
4956 4956 /* No need to send credits if path is going down */
4957 4957 if ((path->state == RSMKA_PATH_ACTIVE) &&
4958 4958 (path->procmsg_cnt >= RSMIPC_LOTSFREE_MSGBUFS)) {
4959 4959 /*
4960 4960 * send credits and reset procmsg_cnt if success otherwise
4961 4961 * credits will be sent after processing the next message
4962 4962 */
4963 4963 e = rsmipc_send_controlmsg(path, RSMIPC_MSG_CREDIT);
4964 4964 if (e == 0)
4965 4965 path->procmsg_cnt = 0;
4966 4966 else
4967 4967 DBG_PRINTF((category, RSM_ERR,
4968 4968 "rsm_intr_proc_deferred:send credits err=%d\n", e));
4969 4969 }
4970 4970
4971 4971 /*
4972 4972 * decrement the path refcnt since we incremented it in
4973 4973 * rsm_intr_callback_dispatch
4974 4974 */
4975 4975 PATH_RELE_NOLOCK(path);
4976 4976
4977 4977 mutex_exit(&path->mutex);
4978 4978
4979 4979 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4980 4980 "rsm_intr_proc_deferred done\n"));
4981 4981 }
4982 4982
4983 4983 /*
4984 4984 * Flow-controlled messages are enqueued and dispatched onto a taskq here
4985 4985 */
4986 4986 static void
4987 4987 rsm_intr_callback_dispatch(void *data, rsm_addr_t src_hwaddr,
4988 4988 rsm_intr_hand_arg_t arg)
4989 4989 {
4990 4990 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
4991 4991 path_t *path;
4992 4992 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
4993 4993 DBG_DEFINE(category,
4994 4994 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4995 4995
4996 4996 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4997 4997 "rsm_intr_callback_dispatch enter\n"));
4998 4998 ASSERT(data && hdlr_argp);
4999 4999
5000 5000 /* look up the path - incr the path refcnt */
5001 5001 path = rsm_find_path(hdlr_argp->adapter_name,
5002 5002 hdlr_argp->adapter_instance, src_hwaddr);
5003 5003
5004 5004 /* the path has been removed - drop this message */
5005 5005 if (path == NULL) {
5006 5006 DBG_PRINTF((category, RSM_DEBUG,
5007 5007 "rsm_intr_callback_dispatch done: msg dropped\n"));
5008 5008 return;
5009 5009 }
5010 5010 /* the path is not active - don't accept new messages */
5011 5011 if (path->state != RSMKA_PATH_ACTIVE) {
5012 5012 PATH_RELE_NOLOCK(path);
5013 5013 mutex_exit(&path->mutex);
5014 5014 DBG_PRINTF((category, RSM_DEBUG,
5015 5015 "rsm_intr_callback_dispatch done: msg dropped"
5016 5016 " path=%lx !ACTIVE\n", path));
5017 5017 return;
5018 5018 }
5019 5019
5020 5020 /*
5021 5021 * Check if this message was sent to an older incarnation
5022 5022 * of the path/sendq.
5023 5023 */
5024 5024 if (path->local_incn != msghdr->rsmipc_incn) {
5025 5025 /* decrement the refcnt */
5026 5026 PATH_RELE_NOLOCK(path);
5027 5027 mutex_exit(&path->mutex);
5028 5028 DBG_PRINTF((category, RSM_DEBUG,
5029 5029 "rsm_intr_callback_dispatch done: old incn %lld\n",
5030 5030 msghdr->rsmipc_incn));
5031 5031 return;
5032 5032 }
5033 5033
5034 5034 /* copy and enqueue msg on the path's msgbuf queue */
5035 5035 rsmka_enqueue_msgbuf(path, data);
5036 5036
5037 5037 /*
5038 5038 * schedule task to process messages - ignore retval from
5039 5039 * task_dispatch because we sender cannot send more than
5040 5040 * what receiver can handle.
5041 5041 */
5042 5042 (void) taskq_dispatch(path->recv_taskq,
5043 5043 rsm_intr_proc_deferred, path, KM_NOSLEEP);
5044 5044
5045 5045 mutex_exit(&path->mutex);
5046 5046
5047 5047 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5048 5048 "rsm_intr_callback_dispatch done\n"));
5049 5049 }
5050 5050
5051 5051 /*
5052 5052 * This procedure is called from rsm_srv_func when a remote node creates a
5053 5053 * a send queue. This event is used as a hint that an earlier failed
5054 5054 * attempt to create a send queue to that remote node may now succeed and
5055 5055 * should be retried. Indication of an earlier failed attempt is provided
5056 5056 * by the RSMKA_SQCREATE_PENDING flag.
5057 5057 */
5058 5058 static void
5059 5059 rsm_sqcreateop_callback(rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5060 5060 {
5061 5061 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
5062 5062 path_t *path;
5063 5063 DBG_DEFINE(category,
5064 5064 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5065 5065
5066 5066 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5067 5067 "rsm_sqcreateop_callback enter\n"));
5068 5068
5069 5069 /* look up the path - incr the path refcnt */
5070 5070 path = rsm_find_path(hdlr_argp->adapter_name,
5071 5071 hdlr_argp->adapter_instance, src_hwaddr);
5072 5072
5073 5073 if (path == NULL) {
5074 5074 DBG_PRINTF((category, RSM_DEBUG,
5075 5075 "rsm_sqcreateop_callback done: no path\n"));
5076 5076 return;
5077 5077 }
5078 5078
5079 5079 if ((path->state == RSMKA_PATH_UP) &&
5080 5080 (path->flags & RSMKA_SQCREATE_PENDING)) {
5081 5081 /*
5082 5082 * previous attempt to create sendq had failed, retry
5083 5083 * it and move to RSMKA_PATH_ACTIVE state if successful.
5084 5084 * the refcnt will be decremented in the do_deferred_work
5085 5085 */
5086 5086 (void) rsmka_do_path_active(path, RSMKA_NO_SLEEP);
5087 5087 } else {
5088 5088 /* decrement the refcnt */
5089 5089 PATH_RELE_NOLOCK(path);
5090 5090 }
5091 5091 mutex_exit(&path->mutex);
5092 5092
5093 5093 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5094 5094 "rsm_sqcreateop_callback done\n"));
5095 5095 }
5096 5096
5097 5097 static void
5098 5098 rsm_intr_callback(void *data, rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5099 5099 {
5100 5100 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
5101 5101 rsmipc_request_t *msg = (rsmipc_request_t *)data;
5102 5102 rsmipc_controlmsg_t *ctrlmsg = (rsmipc_controlmsg_t *)data;
5103 5103 rsm_node_id_t src_node;
5104 5104 DBG_DEFINE(category,
5105 5105 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5106 5106
5107 5107 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback enter:"
5108 5108 "src=%d, type=%d\n", msghdr->rsmipc_src,
5109 5109 msghdr->rsmipc_type));
5110 5110
5111 5111 /*
5112 5112 * Check for the version number in the msg header. If it is not
5113 5113 * RSM_VERSION, drop the message. In the future, we need to manage
5114 5114 * incompatible version numbers in some way
5115 5115 */
5116 5116 if (msghdr->rsmipc_version != RSM_VERSION) {
5117 5117 DBG_PRINTF((category, RSM_ERR, "wrong KA version\n"));
5118 5118 /*
5119 5119 * Drop requests that don't have a reply right here
5120 5120 * Request with reply will send a BAD_VERSION reply
5121 5121 * when they get processed by the worker thread.
5122 5122 */
5123 5123 if (msghdr->rsmipc_type != RSMIPC_MSG_SEGCONNECT) {
5124 5124 return;
5125 5125 }
5126 5126
5127 5127 }
5128 5128
5129 5129 src_node = msghdr->rsmipc_src;
5130 5130
5131 5131 switch (msghdr->rsmipc_type) {
5132 5132 case RSMIPC_MSG_SEGCONNECT:
5133 5133 case RSMIPC_MSG_DISCONNECT:
5134 5134 case RSMIPC_MSG_SUSPEND:
5135 5135 case RSMIPC_MSG_SUSPEND_DONE:
5136 5136 case RSMIPC_MSG_RESUME:
5137 5137 /*
5138 5138 * These message types are handled by a worker thread using
5139 5139 * the flow-control algorithm.
5140 5140 * Any message processing that does one or more of the
5141 5141 * following should be handled in a worker thread.
5142 5142 * - allocates resources and might sleep
5143 5143 * - makes RSMPI calls down to the interconnect driver
5144 5144 * this by defn include requests with reply.
5145 5145 * - takes a long duration of time
5146 5146 */
5147 5147 rsm_intr_callback_dispatch(data, src_hwaddr, arg);
5148 5148 break;
5149 5149 case RSMIPC_MSG_NOTIMPORTING:
5150 5150 importer_list_rm(src_node, msg->rsmipc_key,
5151 5151 msg->rsmipc_segment_cookie);
5152 5152 break;
5153 5153 case RSMIPC_MSG_SQREADY:
5154 5154 rsm_proc_sqready(data, src_hwaddr, arg);
5155 5155 break;
5156 5156 case RSMIPC_MSG_SQREADY_ACK:
5157 5157 rsm_proc_sqready_ack(data, src_hwaddr, arg);
5158 5158 break;
5159 5159 case RSMIPC_MSG_CREDIT:
5160 5160 rsm_add_credits(ctrlmsg, src_hwaddr, arg);
5161 5161 break;
5162 5162 case RSMIPC_MSG_REPLY:
5163 5163 rsm_intr_reply(msghdr);
5164 5164 break;
5165 5165 case RSMIPC_MSG_BELL:
5166 5166 rsm_intr_event(msg);
5167 5167 break;
5168 5168 case RSMIPC_MSG_IMPORTING:
5169 5169 importer_list_add(src_node, msg->rsmipc_key,
5170 5170 msg->rsmipc_adapter_hwaddr,
5171 5171 msg->rsmipc_segment_cookie);
5172 5172 break;
5173 5173 case RSMIPC_MSG_REPUBLISH:
5174 5174 importer_update(src_node, msg->rsmipc_key, msg->rsmipc_perm);
5175 5175 break;
5176 5176 default:
5177 5177 DBG_PRINTF((category, RSM_DEBUG,
5178 5178 "rsm_intr_callback: bad msg %lx type %d data %lx\n",
5179 5179 (size_t)msg, (int)(msghdr->rsmipc_type), (size_t)data));
5180 5180 }
5181 5181
5182 5182 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback done\n"));
5183 5183
5184 5184 }
5185 5185
5186 5186 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
5187 5187 rsm_intr_q_op_t opcode, rsm_addr_t src,
5188 5188 void *data, size_t size, rsm_intr_hand_arg_t arg)
5189 5189 {
5190 5190 DBG_DEFINE(category,
5191 5191 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5192 5192
5193 5193 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func enter\n"));
5194 5194
5195 5195 switch (opcode) {
5196 5196 case RSM_INTR_Q_OP_CREATE:
5197 5197 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_CREATE\n"));
5198 5198 rsm_sqcreateop_callback(src, arg);
5199 5199 break;
5200 5200 case RSM_INTR_Q_OP_DESTROY:
5201 5201 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_DESTROY\n"));
5202 5202 break;
5203 5203 case RSM_INTR_Q_OP_RECEIVE:
5204 5204 rsm_intr_callback(data, src, arg);
5205 5205 break;
5206 5206 default:
5207 5207 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5208 5208 "rsm_srv_func: unknown opcode = %x\n", opcode));
5209 5209 }
5210 5210
5211 5211 chd = chd;
5212 5212 size = size;
5213 5213
5214 5214 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func done\n"));
5215 5215
5216 5216 return (RSM_INTR_HAND_CLAIMED);
5217 5217 }
5218 5218
5219 5219 /* *************************** IPC slots ************************* */
5220 5220 static rsmipc_slot_t *
5221 5221 rsmipc_alloc()
5222 5222 {
5223 5223 int i;
5224 5224 rsmipc_slot_t *slot;
5225 5225 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5226 5226
5227 5227 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc enter\n"));
5228 5228
5229 5229 /* try to find a free slot, if not wait */
5230 5230 mutex_enter(&rsm_ipc.lock);
5231 5231
5232 5232 while (rsm_ipc.count == 0) {
5233 5233 rsm_ipc.wanted = 1;
5234 5234 cv_wait(&rsm_ipc.cv, &rsm_ipc.lock);
5235 5235 }
5236 5236
5237 5237 /* An empty slot is available, find it */
5238 5238 slot = &rsm_ipc.slots[0];
5239 5239 for (i = 0; i < RSMIPC_SZ; i++, slot++) {
5240 5240 if (RSMIPC_GET(slot, RSMIPC_FREE)) {
5241 5241 RSMIPC_CLEAR(slot, RSMIPC_FREE);
5242 5242 break;
5243 5243 }
5244 5244 }
5245 5245
5246 5246 ASSERT(i < RSMIPC_SZ);
5247 5247 rsm_ipc.count--; /* one less is available */
5248 5248 rsm_ipc.sequence++; /* new sequence */
5249 5249
5250 5250 slot->rsmipc_cookie.ic.sequence = (uint_t)rsm_ipc.sequence;
5251 5251 slot->rsmipc_cookie.ic.index = (uint_t)i;
5252 5252
5253 5253 mutex_exit(&rsm_ipc.lock);
5254 5254
5255 5255 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc done\n"));
5256 5256
5257 5257 return (slot);
5258 5258 }
5259 5259
5260 5260 static void
5261 5261 rsmipc_free(rsmipc_slot_t *slot)
5262 5262 {
5263 5263 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5264 5264
5265 5265 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free enter\n"));
5266 5266
5267 5267 ASSERT(MUTEX_HELD(&slot->rsmipc_lock));
5268 5268 ASSERT(&rsm_ipc.slots[slot->rsmipc_cookie.ic.index] == slot);
5269 5269
5270 5270 mutex_enter(&rsm_ipc.lock);
5271 5271
5272 5272 RSMIPC_SET(slot, RSMIPC_FREE);
5273 5273
5274 5274 slot->rsmipc_cookie.ic.sequence = 0;
5275 5275
5276 5276 mutex_exit(&slot->rsmipc_lock);
5277 5277 rsm_ipc.count++;
5278 5278 ASSERT(rsm_ipc.count <= RSMIPC_SZ);
5279 5279 if (rsm_ipc.wanted) {
5280 5280 rsm_ipc.wanted = 0;
5281 5281 cv_broadcast(&rsm_ipc.cv);
5282 5282 }
5283 5283
5284 5284 mutex_exit(&rsm_ipc.lock);
5285 5285
5286 5286 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free done\n"));
5287 5287 }
5288 5288
5289 5289 static int
5290 5290 rsmipc_send(rsm_node_id_t dest, rsmipc_request_t *req, rsmipc_reply_t *reply)
5291 5291 {
5292 5292 int e = 0;
5293 5293 int credit_check = 0;
5294 5294 int retry_cnt = 0;
5295 5295 int min_retry_cnt = 10;
5296 5296 rsm_send_t is;
5297 5297 rsmipc_slot_t *rslot;
5298 5298 adapter_t *adapter;
5299 5299 path_t *path;
5300 5300 sendq_token_t *sendq_token;
5301 5301 sendq_token_t *used_sendq_token = NULL;
5302 5302 rsm_send_q_handle_t ipc_handle;
5303 5303 DBG_DEFINE(category,
5304 5304 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5305 5305
5306 5306 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send enter:dest=%d",
5307 5307 dest));
5308 5308
5309 5309 /*
5310 5310 * Check if this is a local case
5311 5311 */
5312 5312 if (dest == my_nodeid) {
5313 5313 switch (req->rsmipc_hdr.rsmipc_type) {
5314 5314 case RSMIPC_MSG_SEGCONNECT:
5315 5315 reply->rsmipc_status = (short)rsmsegacl_validate(
5316 5316 req, dest, reply);
5317 5317 break;
5318 5318 case RSMIPC_MSG_BELL:
5319 5319 req->rsmipc_hdr.rsmipc_src = dest;
5320 5320 rsm_intr_event(req);
5321 5321 break;
5322 5322 case RSMIPC_MSG_IMPORTING:
5323 5323 importer_list_add(dest, req->rsmipc_key,
5324 5324 req->rsmipc_adapter_hwaddr,
5325 5325 req->rsmipc_segment_cookie);
5326 5326 break;
5327 5327 case RSMIPC_MSG_NOTIMPORTING:
5328 5328 importer_list_rm(dest, req->rsmipc_key,
5329 5329 req->rsmipc_segment_cookie);
5330 5330 break;
5331 5331 case RSMIPC_MSG_REPUBLISH:
5332 5332 importer_update(dest, req->rsmipc_key,
5333 5333 req->rsmipc_perm);
5334 5334 break;
5335 5335 case RSMIPC_MSG_SUSPEND:
5336 5336 importer_suspend(dest);
5337 5337 break;
5338 5338 case RSMIPC_MSG_SUSPEND_DONE:
5339 5339 rsm_suspend_complete(dest, 0);
5340 5340 break;
5341 5341 case RSMIPC_MSG_RESUME:
5342 5342 importer_resume(dest);
5343 5343 break;
5344 5344 default:
5345 5345 ASSERT(0);
5346 5346 }
5347 5347 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5348 5348 "rsmipc_send done\n"));
5349 5349 return (0);
5350 5350 }
5351 5351
5352 5352 if (dest >= MAX_NODES) {
5353 5353 DBG_PRINTF((category, RSM_ERR,
5354 5354 "rsm: rsmipc_send bad node number %x\n", dest));
5355 5355 return (RSMERR_REMOTE_NODE_UNREACHABLE);
5356 5356 }
5357 5357
5358 5358 /*
5359 5359 * Oh boy! we are going remote.
5360 5360 */
5361 5361
5362 5362 /*
5363 5363 * identify if we need to have credits to send this message
5364 5364 * - only selected requests are flow controlled
5365 5365 */
5366 5366 if (req != NULL) {
5367 5367 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5368 5368 "rsmipc_send:request type=%d\n",
5369 5369 req->rsmipc_hdr.rsmipc_type));
5370 5370
5371 5371 switch (req->rsmipc_hdr.rsmipc_type) {
5372 5372 case RSMIPC_MSG_SEGCONNECT:
5373 5373 case RSMIPC_MSG_DISCONNECT:
5374 5374 case RSMIPC_MSG_IMPORTING:
5375 5375 case RSMIPC_MSG_SUSPEND:
5376 5376 case RSMIPC_MSG_SUSPEND_DONE:
5377 5377 case RSMIPC_MSG_RESUME:
5378 5378 credit_check = 1;
5379 5379 break;
5380 5380 default:
5381 5381 credit_check = 0;
5382 5382 }
5383 5383 }
5384 5384
5385 5385 again:
5386 5386 if (retry_cnt++ == min_retry_cnt) {
5387 5387 /* backoff before further retries for 10ms */
5388 5388 delay(drv_usectohz(10000));
5389 5389 retry_cnt = 0; /* reset retry_cnt */
5390 5390 }
5391 5391 sendq_token = rsmka_get_sendq_token(dest, used_sendq_token);
5392 5392 if (sendq_token == NULL) {
5393 5393 DBG_PRINTF((category, RSM_ERR,
5394 5394 "rsm: rsmipc_send no device to reach node %d\n", dest));
5395 5395 return (RSMERR_REMOTE_NODE_UNREACHABLE);
5396 5396 }
5397 5397
5398 5398 if ((sendq_token == used_sendq_token) &&
5399 5399 ((e == RSMERR_CONN_ABORTED) || (e == RSMERR_TIMEOUT) ||
5400 5400 (e == RSMERR_COMM_ERR_MAYBE_DELIVERED))) {
5401 5401 rele_sendq_token(sendq_token);
5402 5402 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send done=%d\n", e));
5403 5403 return (RSMERR_CONN_ABORTED);
5404 5404 } else
5405 5405 used_sendq_token = sendq_token;
5406 5406
5407 5407 /* lint -save -e413 */
5408 5408 path = SQ_TOKEN_TO_PATH(sendq_token);
5409 5409 adapter = path->local_adapter;
5410 5410 /* lint -restore */
5411 5411 ipc_handle = sendq_token->rsmpi_sendq_handle;
5412 5412
5413 5413 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5414 5414 "rsmipc_send: path=%lx sendq_hdl=%lx\n", path, ipc_handle));
5415 5415
5416 5416 if (reply == NULL) {
5417 5417 /* Send request without ack */
5418 5418 /*
5419 5419 * Set the rsmipc_version number in the msghdr for KA
5420 5420 * communication versioning
5421 5421 */
5422 5422 req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5423 5423 req->rsmipc_hdr.rsmipc_src = my_nodeid;
5424 5424 /*
5425 5425 * remote endpoints incn should match the value in our
5426 5426 * path's remote_incn field. No need to grab any lock
5427 5427 * since we have refcnted the path in rsmka_get_sendq_token
5428 5428 */
5429 5429 req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5430 5430
5431 5431 is.is_data = (void *)req;
5432 5432 is.is_size = sizeof (*req);
5433 5433 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5434 5434 is.is_wait = 0;
5435 5435
5436 5436 if (credit_check) {
5437 5437 mutex_enter(&path->mutex);
5438 5438 /*
5439 5439 * wait till we recv credits or path goes down. If path
5440 5440 * goes down rsm_send will fail and we handle the error
5441 5441 * then
5442 5442 */
5443 5443 while ((sendq_token->msgbuf_avail == 0) &&
5444 5444 (path->state == RSMKA_PATH_ACTIVE)) {
5445 5445 e = cv_wait_sig(&sendq_token->sendq_cv,
5446 5446 &path->mutex);
5447 5447 if (e == 0) {
5448 5448 mutex_exit(&path->mutex);
5449 5449 no_reply_cnt++;
5450 5450 rele_sendq_token(sendq_token);
5451 5451 DBG_PRINTF((category, RSM_DEBUG,
5452 5452 "rsmipc_send done: "
5453 5453 "cv_wait INTERRUPTED"));
5454 5454 return (RSMERR_INTERRUPTED);
5455 5455 }
5456 5456 }
5457 5457
5458 5458 /*
5459 5459 * path is not active retry on another path.
5460 5460 */
5461 5461 if (path->state != RSMKA_PATH_ACTIVE) {
5462 5462 mutex_exit(&path->mutex);
5463 5463 rele_sendq_token(sendq_token);
5464 5464 e = RSMERR_CONN_ABORTED;
5465 5465 DBG_PRINTF((category, RSM_ERR,
5466 5466 "rsm: rsmipc_send: path !ACTIVE"));
5467 5467 goto again;
5468 5468 }
5469 5469
5470 5470 ASSERT(sendq_token->msgbuf_avail > 0);
5471 5471
5472 5472 /*
5473 5473 * reserve a msgbuf
5474 5474 */
5475 5475 sendq_token->msgbuf_avail--;
5476 5476
5477 5477 mutex_exit(&path->mutex);
5478 5478
5479 5479 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5480 5480 NULL);
5481 5481
5482 5482 if (e != RSM_SUCCESS) {
5483 5483 mutex_enter(&path->mutex);
5484 5484 /*
5485 5485 * release the reserved msgbuf since
5486 5486 * the send failed
5487 5487 */
5488 5488 sendq_token->msgbuf_avail++;
5489 5489 cv_broadcast(&sendq_token->sendq_cv);
5490 5490 mutex_exit(&path->mutex);
5491 5491 }
5492 5492 } else
5493 5493 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5494 5494 NULL);
↓ open down ↓ |
1417 lines elided |
↑ open up ↑ |
5495 5495
5496 5496 no_reply_cnt++;
5497 5497 rele_sendq_token(sendq_token);
5498 5498 if (e != RSM_SUCCESS) {
5499 5499 DBG_PRINTF((category, RSM_ERR,
5500 5500 "rsm: rsmipc_send no reply send"
5501 5501 " err = %d no reply count = %d\n",
5502 5502 e, no_reply_cnt));
5503 5503 ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5504 5504 e != RSMERR_BAD_BARRIER_HNDL);
5505 - atomic_add_64(&rsm_ipcsend_errcnt, 1);
5505 + atomic_inc_64(&rsm_ipcsend_errcnt);
5506 5506 goto again;
5507 5507 } else {
5508 5508 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5509 5509 "rsmipc_send done\n"));
5510 5510 return (e);
5511 5511 }
5512 5512
5513 5513 }
5514 5514
5515 5515 if (req == NULL) {
5516 5516 /* Send reply - No flow control is done for reply */
5517 5517 /*
5518 5518 * Set the version in the msg header for KA communication
5519 5519 * versioning
5520 5520 */
5521 5521 reply->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5522 5522 reply->rsmipc_hdr.rsmipc_src = my_nodeid;
5523 5523 /* incn number is not used for reply msgs currently */
5524 5524 reply->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5525 5525
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
5526 5526 is.is_data = (void *)reply;
5527 5527 is.is_size = sizeof (*reply);
5528 5528 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5529 5529 is.is_wait = 0;
5530 5530 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5531 5531 rele_sendq_token(sendq_token);
5532 5532 if (e != RSM_SUCCESS) {
5533 5533 DBG_PRINTF((category, RSM_ERR,
5534 5534 "rsm: rsmipc_send reply send"
5535 5535 " err = %d\n", e));
5536 - atomic_add_64(&rsm_ipcsend_errcnt, 1);
5536 + atomic_inc_64(&rsm_ipcsend_errcnt);
5537 5537 goto again;
5538 5538 } else {
5539 5539 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5540 5540 "rsmipc_send done\n"));
5541 5541 return (e);
5542 5542 }
5543 5543 }
5544 5544
5545 5545 /* Reply needed */
5546 5546 rslot = rsmipc_alloc(); /* allocate a new ipc slot */
5547 5547
5548 5548 mutex_enter(&rslot->rsmipc_lock);
5549 5549
5550 5550 rslot->rsmipc_data = (void *)reply;
5551 5551 RSMIPC_SET(rslot, RSMIPC_PENDING);
5552 5552
5553 5553 while (RSMIPC_GET(rslot, RSMIPC_PENDING)) {
5554 5554 /*
5555 5555 * Set the rsmipc_version number in the msghdr for KA
5556 5556 * communication versioning
5557 5557 */
5558 5558 req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5559 5559 req->rsmipc_hdr.rsmipc_src = my_nodeid;
5560 5560 req->rsmipc_hdr.rsmipc_cookie = rslot->rsmipc_cookie;
5561 5561 /*
5562 5562 * remote endpoints incn should match the value in our
5563 5563 * path's remote_incn field. No need to grab any lock
5564 5564 * since we have refcnted the path in rsmka_get_sendq_token
5565 5565 */
5566 5566 req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5567 5567
5568 5568 is.is_data = (void *)req;
5569 5569 is.is_size = sizeof (*req);
5570 5570 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5571 5571 is.is_wait = 0;
5572 5572 if (credit_check) {
5573 5573
5574 5574 mutex_enter(&path->mutex);
5575 5575 /*
5576 5576 * wait till we recv credits or path goes down. If path
5577 5577 * goes down rsm_send will fail and we handle the error
5578 5578 * then.
5579 5579 */
5580 5580 while ((sendq_token->msgbuf_avail == 0) &&
5581 5581 (path->state == RSMKA_PATH_ACTIVE)) {
5582 5582 e = cv_wait_sig(&sendq_token->sendq_cv,
5583 5583 &path->mutex);
5584 5584 if (e == 0) {
5585 5585 mutex_exit(&path->mutex);
5586 5586 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5587 5587 rsmipc_free(rslot);
5588 5588 rele_sendq_token(sendq_token);
5589 5589 DBG_PRINTF((category, RSM_DEBUG,
5590 5590 "rsmipc_send done: "
5591 5591 "cv_wait INTERRUPTED"));
5592 5592 return (RSMERR_INTERRUPTED);
5593 5593 }
5594 5594 }
5595 5595
5596 5596 /*
5597 5597 * path is not active retry on another path.
5598 5598 */
5599 5599 if (path->state != RSMKA_PATH_ACTIVE) {
5600 5600 mutex_exit(&path->mutex);
5601 5601 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5602 5602 rsmipc_free(rslot);
5603 5603 rele_sendq_token(sendq_token);
5604 5604 e = RSMERR_CONN_ABORTED;
5605 5605 DBG_PRINTF((category, RSM_ERR,
5606 5606 "rsm: rsmipc_send: path !ACTIVE"));
5607 5607 goto again;
5608 5608 }
5609 5609
5610 5610 ASSERT(sendq_token->msgbuf_avail > 0);
5611 5611
5612 5612 /*
5613 5613 * reserve a msgbuf
5614 5614 */
5615 5615 sendq_token->msgbuf_avail--;
5616 5616
5617 5617 mutex_exit(&path->mutex);
5618 5618
5619 5619 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5620 5620 NULL);
5621 5621
5622 5622 if (e != RSM_SUCCESS) {
5623 5623 mutex_enter(&path->mutex);
5624 5624 /*
5625 5625 * release the reserved msgbuf since
5626 5626 * the send failed
5627 5627 */
5628 5628 sendq_token->msgbuf_avail++;
5629 5629 cv_broadcast(&sendq_token->sendq_cv);
5630 5630 mutex_exit(&path->mutex);
5631 5631 }
↓ open down ↓ |
85 lines elided |
↑ open up ↑ |
5632 5632 } else
5633 5633 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5634 5634 NULL);
5635 5635
5636 5636 if (e != RSM_SUCCESS) {
5637 5637 DBG_PRINTF((category, RSM_ERR,
5638 5638 "rsm: rsmipc_send rsmpi send err = %d\n", e));
5639 5639 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5640 5640 rsmipc_free(rslot);
5641 5641 rele_sendq_token(sendq_token);
5642 - atomic_add_64(&rsm_ipcsend_errcnt, 1);
5642 + atomic_inc_64(&rsm_ipcsend_errcnt);
5643 5643 goto again;
5644 5644 }
5645 5645
5646 5646 /* wait for a reply signal, a SIGINT, or 5 sec. timeout */
5647 5647 e = cv_reltimedwait_sig(&rslot->rsmipc_cv, &rslot->rsmipc_lock,
5648 5648 drv_usectohz(5000000), TR_CLOCK_TICK);
5649 5649 if (e < 0) {
5650 5650 /* timed out - retry */
5651 5651 e = RSMERR_TIMEOUT;
5652 5652 } else if (e == 0) {
5653 5653 /* signalled - return error */
5654 5654 e = RSMERR_INTERRUPTED;
5655 5655 break;
5656 5656 } else {
5657 5657 e = RSM_SUCCESS;
5658 5658 }
5659 5659 }
5660 5660
5661 5661 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5662 5662 rsmipc_free(rslot);
5663 5663 rele_sendq_token(sendq_token);
5664 5664
5665 5665 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send done=%d\n", e));
5666 5666 return (e);
5667 5667 }
5668 5668
5669 5669 static int
5670 5670 rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid, void *cookie)
5671 5671 {
5672 5672 rsmipc_request_t request;
5673 5673
5674 5674 /*
5675 5675 * inform the exporter to delete this importer
5676 5676 */
5677 5677 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
5678 5678 request.rsmipc_key = segid;
5679 5679 request.rsmipc_segment_cookie = cookie;
5680 5680 return (rsmipc_send(dest, &request, RSM_NO_REPLY));
5681 5681 }
5682 5682
5683 5683 static void
5684 5684 rsm_send_republish(rsm_memseg_id_t segid, rsmapi_access_entry_t *acl,
5685 5685 int acl_len, rsm_permission_t default_permission)
5686 5686 {
5687 5687 int i;
5688 5688 importing_token_t *token;
5689 5689 rsmipc_request_t request;
5690 5690 republish_token_t *republish_list = NULL;
5691 5691 republish_token_t *rp;
5692 5692 rsm_permission_t permission;
5693 5693 int index;
5694 5694
5695 5695 /*
5696 5696 * send the new access mode to all the nodes that have imported
5697 5697 * this segment.
5698 5698 * If the new acl does not have a node that was present in
5699 5699 * the old acl a access permission of 0 is sent.
5700 5700 */
5701 5701
5702 5702 index = rsmhash(segid);
5703 5703
5704 5704 /*
5705 5705 * create a list of node/permissions to send the republish message
5706 5706 */
5707 5707 mutex_enter(&importer_list.lock);
5708 5708
5709 5709 token = importer_list.bucket[index];
5710 5710 while (token != NULL) {
5711 5711 if (segid == token->key) {
5712 5712 permission = default_permission;
5713 5713
5714 5714 for (i = 0; i < acl_len; i++) {
5715 5715 if (token->importing_node == acl[i].ae_node) {
5716 5716 permission = acl[i].ae_permission;
5717 5717 break;
5718 5718 }
5719 5719 }
5720 5720 rp = kmem_zalloc(sizeof (republish_token_t), KM_SLEEP);
5721 5721
5722 5722 rp->key = segid;
5723 5723 rp->importing_node = token->importing_node;
5724 5724 rp->permission = permission;
5725 5725 rp->next = republish_list;
5726 5726 republish_list = rp;
5727 5727 }
5728 5728 token = token->next;
5729 5729 }
5730 5730
5731 5731 mutex_exit(&importer_list.lock);
5732 5732
5733 5733 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPUBLISH;
5734 5734 request.rsmipc_key = segid;
5735 5735
5736 5736 while (republish_list != NULL) {
5737 5737 request.rsmipc_perm = republish_list->permission;
5738 5738 (void) rsmipc_send(republish_list->importing_node,
5739 5739 &request, RSM_NO_REPLY);
5740 5740 rp = republish_list;
5741 5741 republish_list = republish_list->next;
5742 5742 kmem_free(rp, sizeof (republish_token_t));
5743 5743 }
5744 5744 }
5745 5745
5746 5746 static void
5747 5747 rsm_send_suspend()
5748 5748 {
5749 5749 int i, e;
5750 5750 rsmipc_request_t request;
5751 5751 list_element_t *tokp;
5752 5752 list_element_t *head = NULL;
5753 5753 importing_token_t *token;
5754 5754 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5755 5755 "rsm_send_suspend enter\n"));
5756 5756
5757 5757 /*
5758 5758 * create a list of node to send the suspend message
5759 5759 *
5760 5760 * Currently the whole importer list is scanned and we obtain
5761 5761 * all the nodes - this basically gets all nodes that at least
5762 5762 * import one segment from the local node.
5763 5763 *
5764 5764 * no need to grab the rsm_suspend_list lock here since we are
5765 5765 * single threaded when suspend is called.
5766 5766 */
5767 5767
5768 5768 mutex_enter(&importer_list.lock);
5769 5769 for (i = 0; i < rsm_hash_size; i++) {
5770 5770
5771 5771 token = importer_list.bucket[i];
5772 5772
5773 5773 while (token != NULL) {
5774 5774
5775 5775 tokp = head;
5776 5776
5777 5777 /*
5778 5778 * make sure that the token's node
5779 5779 * is not already on the suspend list
5780 5780 */
5781 5781 while (tokp != NULL) {
5782 5782 if (tokp->nodeid == token->importing_node) {
5783 5783 break;
5784 5784 }
5785 5785 tokp = tokp->next;
5786 5786 }
5787 5787
5788 5788 if (tokp == NULL) { /* not in suspend list */
5789 5789 tokp = kmem_zalloc(sizeof (list_element_t),
5790 5790 KM_SLEEP);
5791 5791 tokp->nodeid = token->importing_node;
5792 5792 tokp->next = head;
5793 5793 head = tokp;
5794 5794 }
5795 5795
5796 5796 token = token->next;
5797 5797 }
5798 5798 }
5799 5799 mutex_exit(&importer_list.lock);
5800 5800
5801 5801 if (head == NULL) { /* no importers so go ahead and quiesce segments */
5802 5802 exporter_quiesce();
5803 5803 return;
5804 5804 }
5805 5805
5806 5806 mutex_enter(&rsm_suspend_list.list_lock);
5807 5807 ASSERT(rsm_suspend_list.list_head == NULL);
5808 5808 /*
5809 5809 * update the suspend list righaway so that if a node dies the
5810 5810 * pathmanager can set the NODE dead flag
5811 5811 */
5812 5812 rsm_suspend_list.list_head = head;
5813 5813 mutex_exit(&rsm_suspend_list.list_lock);
5814 5814
5815 5815 tokp = head;
5816 5816
5817 5817 while (tokp != NULL) {
5818 5818 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND;
5819 5819 e = rsmipc_send(tokp->nodeid, &request, RSM_NO_REPLY);
5820 5820 /*
5821 5821 * Error in rsmipc_send currently happens due to inaccessibility
5822 5822 * of the remote node.
5823 5823 */
5824 5824 if (e == RSM_SUCCESS) { /* send failed - don't wait for ack */
5825 5825 tokp->flags |= RSM_SUSPEND_ACKPENDING;
5826 5826 }
5827 5827
5828 5828 tokp = tokp->next;
5829 5829 }
5830 5830
5831 5831 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5832 5832 "rsm_send_suspend done\n"));
5833 5833
5834 5834 }
5835 5835
5836 5836 static void
5837 5837 rsm_send_resume()
5838 5838 {
5839 5839 rsmipc_request_t request;
5840 5840 list_element_t *elem, *head;
5841 5841
5842 5842 /*
5843 5843 * save the suspend list so that we know where to send
5844 5844 * the resume messages and make the suspend list head
5845 5845 * NULL.
5846 5846 */
5847 5847 mutex_enter(&rsm_suspend_list.list_lock);
5848 5848 head = rsm_suspend_list.list_head;
5849 5849 rsm_suspend_list.list_head = NULL;
5850 5850 mutex_exit(&rsm_suspend_list.list_lock);
5851 5851
5852 5852 while (head != NULL) {
5853 5853 elem = head;
5854 5854 head = head->next;
5855 5855
5856 5856 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_RESUME;
5857 5857
5858 5858 (void) rsmipc_send(elem->nodeid, &request, RSM_NO_REPLY);
5859 5859
5860 5860 kmem_free((void *)elem, sizeof (list_element_t));
5861 5861
5862 5862 }
5863 5863
5864 5864 }
5865 5865
5866 5866 /*
5867 5867 * This function takes path and sends a message using the sendq
5868 5868 * corresponding to it. The RSMIPC_MSG_SQREADY, RSMIPC_MSG_SQREADY_ACK
5869 5869 * and RSMIPC_MSG_CREDIT are sent using this function.
5870 5870 */
5871 5871 int
5872 5872 rsmipc_send_controlmsg(path_t *path, int msgtype)
5873 5873 {
5874 5874 int e;
5875 5875 int retry_cnt = 0;
5876 5876 int min_retry_cnt = 10;
5877 5877 adapter_t *adapter;
5878 5878 rsm_send_t is;
5879 5879 rsm_send_q_handle_t ipc_handle;
5880 5880 rsmipc_controlmsg_t msg;
5881 5881 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_FLOWCONTROL);
5882 5882
5883 5883 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5884 5884 "rsmipc_send_controlmsg enter\n"));
5885 5885
5886 5886 ASSERT(MUTEX_HELD(&path->mutex));
5887 5887
5888 5888 adapter = path->local_adapter;
5889 5889
5890 5890 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send_controlmsg:path=%lx "
5891 5891 "msgtype=%d %lx:%llx->%lx:%llx procmsg=%d\n", path, msgtype,
5892 5892 my_nodeid, adapter->hwaddr, path->remote_node,
5893 5893 path->remote_hwaddr, path->procmsg_cnt));
5894 5894
5895 5895 if (path->state != RSMKA_PATH_ACTIVE) {
5896 5896 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5897 5897 "rsmipc_send_controlmsg done: ! RSMKA_PATH_ACTIVE"));
5898 5898 return (1);
5899 5899 }
5900 5900
5901 5901 ipc_handle = path->sendq_token.rsmpi_sendq_handle;
5902 5902
5903 5903 msg.rsmipc_hdr.rsmipc_version = RSM_VERSION;
5904 5904 msg.rsmipc_hdr.rsmipc_src = my_nodeid;
5905 5905 msg.rsmipc_hdr.rsmipc_type = msgtype;
5906 5906 msg.rsmipc_hdr.rsmipc_incn = path->remote_incn;
5907 5907
5908 5908 if (msgtype == RSMIPC_MSG_CREDIT)
5909 5909 msg.rsmipc_credits = path->procmsg_cnt;
5910 5910
5911 5911 msg.rsmipc_local_incn = path->local_incn;
5912 5912
5913 5913 msg.rsmipc_adapter_hwaddr = adapter->hwaddr;
5914 5914 /* incr the sendq, path refcnt */
5915 5915 PATH_HOLD_NOLOCK(path);
5916 5916 SENDQ_TOKEN_HOLD(path);
5917 5917
5918 5918 do {
5919 5919 /* drop the path lock before doing the rsm_send */
5920 5920 mutex_exit(&path->mutex);
5921 5921
5922 5922 is.is_data = (void *)&msg;
5923 5923 is.is_size = sizeof (msg);
5924 5924 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5925 5925 is.is_wait = 0;
5926 5926
5927 5927 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
↓ open down ↓ |
275 lines elided |
↑ open up ↑ |
5928 5928
5929 5929 ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5930 5930 e != RSMERR_BAD_BARRIER_HNDL);
5931 5931
5932 5932 mutex_enter(&path->mutex);
5933 5933
5934 5934 if (e == RSM_SUCCESS) {
5935 5935 break;
5936 5936 }
5937 5937 /* error counter for statistics */
5938 - atomic_add_64(&rsm_ctrlmsg_errcnt, 1);
5938 + atomic_inc_64(&rsm_ctrlmsg_errcnt);
5939 5939
5940 5940 DBG_PRINTF((category, RSM_ERR,
5941 5941 "rsmipc_send_controlmsg:rsm_send error=%d", e));
5942 5942
5943 5943 if (++retry_cnt == min_retry_cnt) { /* backoff before retry */
5944 5944 (void) cv_reltimedwait(&path->sendq_token.sendq_cv,
5945 5945 &path->mutex, drv_usectohz(10000), TR_CLOCK_TICK);
5946 5946 retry_cnt = 0;
5947 5947 }
5948 5948 } while (path->state == RSMKA_PATH_ACTIVE);
5949 5949
5950 5950 /* decrement the sendq,path refcnt that we incr before rsm_send */
5951 5951 SENDQ_TOKEN_RELE(path);
5952 5952 PATH_RELE_NOLOCK(path);
5953 5953
5954 5954 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5955 5955 "rsmipc_send_controlmsg done=%d", e));
5956 5956 return (e);
5957 5957 }
5958 5958
5959 5959 /*
5960 5960 * Called from rsm_force_unload and path_importer_disconnect. The memory
5961 5961 * mapping for the imported segment is removed and the segment is
5962 5962 * disconnected at the interconnect layer if disconnect_flag is TRUE.
5963 5963 * rsm_force_unload will get disconnect_flag TRUE from rsm_intr_callback
5964 5964 * and FALSE from rsm_rebind.
5965 5965 *
5966 5966 * When subsequent accesses cause page faulting, the dummy page is mapped
5967 5967 * to resolve the fault, and the mapping generation number is incremented
5968 5968 * so that the application can be notified on a close barrier operation.
5969 5969 *
5970 5970 * It is important to note that the caller of rsmseg_unload is responsible for
5971 5971 * acquiring the segment lock before making a call to rsmseg_unload. This is
5972 5972 * required to make the caller and rsmseg_unload thread safe. The segment lock
5973 5973 * will be released by the rsmseg_unload function.
5974 5974 */
5975 5975 void
5976 5976 rsmseg_unload(rsmseg_t *im_seg)
5977 5977 {
5978 5978 rsmcookie_t *hdl;
5979 5979 void *shared_cookie;
5980 5980 rsmipc_request_t request;
5981 5981 uint_t maxprot;
5982 5982
5983 5983 DBG_DEFINE(category,
5984 5984 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5985 5985
5986 5986 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload enter\n"));
5987 5987
5988 5988 ASSERT(im_seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
5989 5989
5990 5990 /* wait until segment leaves the mapping state */
5991 5991 while (im_seg->s_state == RSM_STATE_MAPPING)
5992 5992 cv_wait(&im_seg->s_cv, &im_seg->s_lock);
5993 5993 /*
5994 5994 * An unload is only necessary if the segment is connected. However,
5995 5995 * if the segment was on the import list in state RSM_STATE_CONNECTING
5996 5996 * then a connection was in progress. Change to RSM_STATE_NEW
5997 5997 * here to cause an early exit from the connection process.
5998 5998 */
5999 5999 if (im_seg->s_state == RSM_STATE_NEW) {
6000 6000 rsmseglock_release(im_seg);
6001 6001 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6002 6002 "rsmseg_unload done: RSM_STATE_NEW\n"));
6003 6003 return;
6004 6004 } else if (im_seg->s_state == RSM_STATE_CONNECTING) {
6005 6005 im_seg->s_state = RSM_STATE_ABORT_CONNECT;
6006 6006 rsmsharelock_acquire(im_seg);
6007 6007 im_seg->s_share->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
6008 6008 rsmsharelock_release(im_seg);
6009 6009 rsmseglock_release(im_seg);
6010 6010 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6011 6011 "rsmseg_unload done: RSM_STATE_CONNECTING\n"));
6012 6012 return;
6013 6013 }
6014 6014
6015 6015 if (im_seg->s_flags & RSM_FORCE_DISCONNECT) {
6016 6016 if (im_seg->s_ckl != NULL) {
6017 6017 int e;
6018 6018 /* Setup protections for remap */
6019 6019 maxprot = PROT_USER;
6020 6020 if (im_seg->s_mode & RSM_PERM_READ) {
6021 6021 maxprot |= PROT_READ;
6022 6022 }
6023 6023 if (im_seg->s_mode & RSM_PERM_WRITE) {
6024 6024 maxprot |= PROT_WRITE;
6025 6025 }
6026 6026 hdl = im_seg->s_ckl;
6027 6027 for (; hdl != NULL; hdl = hdl->c_next) {
6028 6028 e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
6029 6029 remap_cookie,
6030 6030 hdl->c_off, hdl->c_len,
6031 6031 maxprot, 0, NULL);
6032 6032
6033 6033 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6034 6034 "remap returns %d\n", e));
6035 6035 }
6036 6036 }
6037 6037
6038 6038 (void) rsm_closeconnection(im_seg, &shared_cookie);
6039 6039
6040 6040 if (shared_cookie != NULL) {
6041 6041 /*
6042 6042 * inform the exporting node so this import
6043 6043 * can be deleted from the list of importers.
6044 6044 */
6045 6045 request.rsmipc_hdr.rsmipc_type =
6046 6046 RSMIPC_MSG_NOTIMPORTING;
6047 6047 request.rsmipc_key = im_seg->s_segid;
6048 6048 request.rsmipc_segment_cookie = shared_cookie;
6049 6049 rsmseglock_release(im_seg);
6050 6050 (void) rsmipc_send(im_seg->s_node, &request,
6051 6051 RSM_NO_REPLY);
6052 6052 } else {
6053 6053 rsmseglock_release(im_seg);
6054 6054 }
6055 6055 }
6056 6056 else
6057 6057 rsmseglock_release(im_seg);
6058 6058
6059 6059 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload done\n"));
6060 6060
6061 6061 }
6062 6062
6063 6063 /* ****************************** Importer Calls ************************ */
6064 6064
6065 6065 static int
6066 6066 rsm_access(uid_t owner, gid_t group, int perm, int mode, const struct cred *cr)
6067 6067 {
6068 6068 int shifts = 0;
6069 6069
6070 6070 if (crgetuid(cr) != owner) {
6071 6071 shifts += 3;
6072 6072 if (!groupmember(group, cr))
6073 6073 shifts += 3;
6074 6074 }
6075 6075
6076 6076 mode &= ~(perm << shifts);
6077 6077
6078 6078 if (mode == 0)
6079 6079 return (0);
6080 6080
6081 6081 return (secpolicy_rsm_access(cr, owner, mode));
6082 6082 }
6083 6083
6084 6084
6085 6085 static int
6086 6086 rsm_connect(rsmseg_t *seg, rsm_ioctlmsg_t *msg, cred_t *cred,
6087 6087 intptr_t dataptr, int mode)
6088 6088 {
6089 6089 int e;
6090 6090 int recheck_state = 0;
6091 6091 void *shared_cookie;
6092 6092 rsmipc_request_t request;
6093 6093 rsmipc_reply_t reply;
6094 6094 rsm_permission_t access;
6095 6095 adapter_t *adapter;
6096 6096 rsm_addr_t addr = 0;
6097 6097 rsm_import_share_t *sharedp;
6098 6098 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6099 6099
6100 6100 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect enter\n"));
6101 6101
6102 6102 adapter = rsm_getadapter(msg, mode);
6103 6103 if (adapter == NULL) {
6104 6104 DBG_PRINTF((category, RSM_ERR,
6105 6105 "rsm_connect done:ENODEV adapter=NULL\n"));
6106 6106 return (RSMERR_CTLR_NOT_PRESENT);
6107 6107 }
6108 6108
6109 6109 if ((adapter == &loopback_adapter) && (msg->nodeid != my_nodeid)) {
6110 6110 rsmka_release_adapter(adapter);
6111 6111 DBG_PRINTF((category, RSM_ERR,
6112 6112 "rsm_connect done:ENODEV loopback\n"));
6113 6113 return (RSMERR_CTLR_NOT_PRESENT);
6114 6114 }
6115 6115
6116 6116
6117 6117 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6118 6118 ASSERT(seg->s_state == RSM_STATE_NEW);
6119 6119
6120 6120 /*
6121 6121 * Translate perm to access
6122 6122 */
6123 6123 if (msg->perm & ~RSM_PERM_RDWR) {
6124 6124 rsmka_release_adapter(adapter);
6125 6125 DBG_PRINTF((category, RSM_ERR,
6126 6126 "rsm_connect done:EINVAL invalid perms\n"));
6127 6127 return (RSMERR_BAD_PERMS);
6128 6128 }
6129 6129 access = 0;
6130 6130 if (msg->perm & RSM_PERM_READ)
6131 6131 access |= RSM_ACCESS_READ;
6132 6132 if (msg->perm & RSM_PERM_WRITE)
6133 6133 access |= RSM_ACCESS_WRITE;
6134 6134
6135 6135 seg->s_node = msg->nodeid;
6136 6136
6137 6137 /*
6138 6138 * Adding to the import list locks the segment; release the segment
6139 6139 * lock so we can get the reply for the send.
6140 6140 */
6141 6141 e = rsmimport_add(seg, msg->key);
6142 6142 if (e) {
6143 6143 rsmka_release_adapter(adapter);
6144 6144 DBG_PRINTF((category, RSM_ERR,
6145 6145 "rsm_connect done:rsmimport_add failed %d\n", e));
6146 6146 return (e);
6147 6147 }
6148 6148 seg->s_state = RSM_STATE_CONNECTING;
6149 6149
6150 6150 /*
6151 6151 * Set the s_adapter field here so as to have a valid comparison of
6152 6152 * the adapter and the s_adapter value during rsmshare_get. For
6153 6153 * any error, set s_adapter to NULL before doing a release_adapter
6154 6154 */
6155 6155 seg->s_adapter = adapter;
6156 6156
6157 6157 rsmseglock_release(seg);
6158 6158
6159 6159 /*
6160 6160 * get the pointer to the shared data structure; the
6161 6161 * shared data is locked and refcount has been incremented
6162 6162 */
6163 6163 sharedp = rsmshare_get(msg->key, msg->nodeid, adapter, seg);
6164 6164
6165 6165 ASSERT(rsmsharelock_held(seg));
6166 6166
6167 6167 do {
6168 6168 /* flag indicates whether we need to recheck the state */
6169 6169 recheck_state = 0;
6170 6170 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6171 6171 "rsm_connect:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
6172 6172 switch (sharedp->rsmsi_state) {
6173 6173 case RSMSI_STATE_NEW:
6174 6174 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6175 6175 break;
6176 6176 case RSMSI_STATE_CONNECTING:
6177 6177 /* FALLTHRU */
6178 6178 case RSMSI_STATE_CONN_QUIESCE:
6179 6179 /* FALLTHRU */
6180 6180 case RSMSI_STATE_MAP_QUIESCE:
6181 6181 /* wait for the state to change */
6182 6182 while ((sharedp->rsmsi_state ==
6183 6183 RSMSI_STATE_CONNECTING) ||
6184 6184 (sharedp->rsmsi_state ==
6185 6185 RSMSI_STATE_CONN_QUIESCE) ||
6186 6186 (sharedp->rsmsi_state ==
6187 6187 RSMSI_STATE_MAP_QUIESCE)) {
6188 6188 if (cv_wait_sig(&sharedp->rsmsi_cv,
6189 6189 &sharedp->rsmsi_lock) == 0) {
6190 6190 /* signalled - clean up and return */
6191 6191 rsmsharelock_release(seg);
6192 6192 rsmimport_rm(seg);
6193 6193 seg->s_adapter = NULL;
6194 6194 rsmka_release_adapter(adapter);
6195 6195 seg->s_state = RSM_STATE_NEW;
6196 6196 DBG_PRINTF((category, RSM_ERR,
6197 6197 "rsm_connect done: INTERRUPTED\n"));
6198 6198 return (RSMERR_INTERRUPTED);
6199 6199 }
6200 6200 }
6201 6201 /*
6202 6202 * the state changed, loop back and check what it is
6203 6203 */
6204 6204 recheck_state = 1;
6205 6205 break;
6206 6206 case RSMSI_STATE_ABORT_CONNECT:
6207 6207 /* exit the loop and clean up further down */
6208 6208 break;
6209 6209 case RSMSI_STATE_CONNECTED:
6210 6210 /* already connected, good - fall through */
6211 6211 case RSMSI_STATE_MAPPED:
6212 6212 /* already mapped, wow - fall through */
6213 6213 /* access validation etc is done further down */
6214 6214 break;
6215 6215 case RSMSI_STATE_DISCONNECTED:
6216 6216 /* disconnected - so reconnect now */
6217 6217 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6218 6218 break;
6219 6219 default:
6220 6220 ASSERT(0); /* Invalid State */
6221 6221 }
6222 6222 } while (recheck_state);
6223 6223
6224 6224 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6225 6225 /* we are the first to connect */
6226 6226 rsmsharelock_release(seg);
6227 6227
6228 6228 if (msg->nodeid != my_nodeid) {
6229 6229 addr = get_remote_hwaddr(adapter, msg->nodeid);
6230 6230
6231 6231 if ((int64_t)addr < 0) {
6232 6232 rsmsharelock_acquire(seg);
6233 6233 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6234 6234 RSMSI_STATE_NEW);
6235 6235 rsmsharelock_release(seg);
6236 6236 rsmimport_rm(seg);
6237 6237 seg->s_adapter = NULL;
6238 6238 rsmka_release_adapter(adapter);
6239 6239 seg->s_state = RSM_STATE_NEW;
6240 6240 DBG_PRINTF((category, RSM_ERR,
6241 6241 "rsm_connect done: hwaddr<0\n"));
6242 6242 return (RSMERR_INTERNAL_ERROR);
6243 6243 }
6244 6244 } else {
6245 6245 addr = adapter->hwaddr;
6246 6246 }
6247 6247
6248 6248 /*
6249 6249 * send request to node [src, dest, key, msgid] and get back
6250 6250 * [status, msgid, cookie]
6251 6251 */
6252 6252 request.rsmipc_key = msg->key;
6253 6253 /*
6254 6254 * we need the s_mode of the exporter so pass
6255 6255 * RSM_ACCESS_TRUSTED
6256 6256 */
6257 6257 request.rsmipc_perm = RSM_ACCESS_TRUSTED;
6258 6258 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SEGCONNECT;
6259 6259 request.rsmipc_adapter_hwaddr = addr;
6260 6260 request.rsmipc_segment_cookie = sharedp;
6261 6261
6262 6262 e = (int)rsmipc_send(msg->nodeid, &request, &reply);
6263 6263 if (e) {
6264 6264 rsmsharelock_acquire(seg);
6265 6265 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6266 6266 RSMSI_STATE_NEW);
6267 6267 rsmsharelock_release(seg);
6268 6268 rsmimport_rm(seg);
6269 6269 seg->s_adapter = NULL;
6270 6270 rsmka_release_adapter(adapter);
6271 6271 seg->s_state = RSM_STATE_NEW;
6272 6272 DBG_PRINTF((category, RSM_ERR,
6273 6273 "rsm_connect done:rsmipc_send failed %d\n", e));
6274 6274 return (e);
6275 6275 }
6276 6276
6277 6277 if (reply.rsmipc_status != RSM_SUCCESS) {
6278 6278 rsmsharelock_acquire(seg);
6279 6279 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6280 6280 RSMSI_STATE_NEW);
6281 6281 rsmsharelock_release(seg);
6282 6282 rsmimport_rm(seg);
6283 6283 seg->s_adapter = NULL;
6284 6284 rsmka_release_adapter(adapter);
6285 6285 seg->s_state = RSM_STATE_NEW;
6286 6286 DBG_PRINTF((category, RSM_ERR,
6287 6287 "rsm_connect done:rsmipc_send reply err %d\n",
6288 6288 reply.rsmipc_status));
6289 6289 return (reply.rsmipc_status);
6290 6290 }
6291 6291
6292 6292 rsmsharelock_acquire(seg);
6293 6293 /* store the information recvd into the shared data struct */
6294 6294 sharedp->rsmsi_mode = reply.rsmipc_mode;
6295 6295 sharedp->rsmsi_uid = reply.rsmipc_uid;
6296 6296 sharedp->rsmsi_gid = reply.rsmipc_gid;
6297 6297 sharedp->rsmsi_seglen = reply.rsmipc_seglen;
6298 6298 sharedp->rsmsi_cookie = sharedp;
6299 6299 }
6300 6300
6301 6301 rsmsharelock_release(seg);
6302 6302
6303 6303 /*
6304 6304 * Get the segment lock and check for a force disconnect
6305 6305 * from the export side which would have changed the state
6306 6306 * back to RSM_STATE_NEW. Once the segment lock is acquired a
6307 6307 * force disconnect will be held off until the connection
6308 6308 * has completed.
6309 6309 */
6310 6310 rsmseglock_acquire(seg);
6311 6311 rsmsharelock_acquire(seg);
6312 6312 ASSERT(seg->s_state == RSM_STATE_CONNECTING ||
6313 6313 seg->s_state == RSM_STATE_ABORT_CONNECT);
6314 6314
6315 6315 shared_cookie = sharedp->rsmsi_cookie;
6316 6316
6317 6317 if ((seg->s_state == RSM_STATE_ABORT_CONNECT) ||
6318 6318 (sharedp->rsmsi_state == RSMSI_STATE_ABORT_CONNECT)) {
6319 6319 seg->s_state = RSM_STATE_NEW;
6320 6320 seg->s_adapter = NULL;
6321 6321 rsmsharelock_release(seg);
6322 6322 rsmseglock_release(seg);
6323 6323 rsmimport_rm(seg);
6324 6324 rsmka_release_adapter(adapter);
6325 6325
6326 6326 rsmsharelock_acquire(seg);
6327 6327 if (!(sharedp->rsmsi_flags & RSMSI_FLAGS_ABORTDONE)) {
6328 6328 /*
6329 6329 * set a flag indicating abort handling has been
6330 6330 * done
6331 6331 */
6332 6332 sharedp->rsmsi_flags |= RSMSI_FLAGS_ABORTDONE;
6333 6333 rsmsharelock_release(seg);
6334 6334 /* send a message to exporter - only once */
6335 6335 (void) rsm_send_notimporting(msg->nodeid,
6336 6336 msg->key, shared_cookie);
6337 6337 rsmsharelock_acquire(seg);
6338 6338 /*
6339 6339 * wake up any waiting importers and inform that
6340 6340 * connection has been aborted
6341 6341 */
6342 6342 cv_broadcast(&sharedp->rsmsi_cv);
6343 6343 }
6344 6344 rsmsharelock_release(seg);
6345 6345
6346 6346 DBG_PRINTF((category, RSM_ERR,
6347 6347 "rsm_connect done: RSM_STATE_ABORT_CONNECT\n"));
6348 6348 return (RSMERR_INTERRUPTED);
6349 6349 }
6350 6350
6351 6351
6352 6352 /*
6353 6353 * We need to verify that this process has access
6354 6354 */
6355 6355 e = rsm_access(sharedp->rsmsi_uid, sharedp->rsmsi_gid,
6356 6356 access & sharedp->rsmsi_mode,
6357 6357 (int)(msg->perm & RSM_PERM_RDWR), cred);
6358 6358 if (e) {
6359 6359 rsmsharelock_release(seg);
6360 6360 seg->s_state = RSM_STATE_NEW;
6361 6361 seg->s_adapter = NULL;
6362 6362 rsmseglock_release(seg);
6363 6363 rsmimport_rm(seg);
6364 6364 rsmka_release_adapter(adapter);
6365 6365 /*
6366 6366 * No need to lock segment it has been removed
6367 6367 * from the hash table
6368 6368 */
6369 6369 rsmsharelock_acquire(seg);
6370 6370 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6371 6371 rsmsharelock_release(seg);
6372 6372 /* this is the first importer */
6373 6373
6374 6374 (void) rsm_send_notimporting(msg->nodeid, msg->key,
6375 6375 shared_cookie);
6376 6376 rsmsharelock_acquire(seg);
6377 6377 sharedp->rsmsi_state = RSMSI_STATE_NEW;
6378 6378 cv_broadcast(&sharedp->rsmsi_cv);
6379 6379 }
6380 6380 rsmsharelock_release(seg);
6381 6381
6382 6382 DBG_PRINTF((category, RSM_ERR,
6383 6383 "rsm_connect done: ipcaccess failed\n"));
6384 6384 return (RSMERR_PERM_DENIED);
6385 6385 }
6386 6386
6387 6387 /* update state and cookie */
6388 6388 seg->s_segid = sharedp->rsmsi_segid;
6389 6389 seg->s_len = sharedp->rsmsi_seglen;
6390 6390 seg->s_mode = access & sharedp->rsmsi_mode;
6391 6391 seg->s_pid = ddi_get_pid();
6392 6392 seg->s_mapinfo = NULL;
6393 6393
6394 6394 if (seg->s_node != my_nodeid) {
6395 6395 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6396 6396 e = adapter->rsmpi_ops->rsm_connect(
6397 6397 adapter->rsmpi_handle,
6398 6398 addr, seg->s_segid, &sharedp->rsmsi_handle);
6399 6399
6400 6400 if (e != RSM_SUCCESS) {
6401 6401 seg->s_state = RSM_STATE_NEW;
6402 6402 seg->s_adapter = NULL;
6403 6403 rsmsharelock_release(seg);
6404 6404 rsmseglock_release(seg);
6405 6405 rsmimport_rm(seg);
6406 6406 rsmka_release_adapter(adapter);
6407 6407 /*
6408 6408 * inform the exporter to delete this importer
6409 6409 */
6410 6410 (void) rsm_send_notimporting(msg->nodeid,
6411 6411 msg->key, shared_cookie);
6412 6412
6413 6413 /*
6414 6414 * Now inform any waiting importers to
6415 6415 * retry connect. This needs to be done
6416 6416 * after sending notimporting so that
6417 6417 * the notimporting is sent before a waiting
6418 6418 * importer sends a segconnect while retrying
6419 6419 *
6420 6420 * No need to lock segment it has been removed
6421 6421 * from the hash table
6422 6422 */
6423 6423
6424 6424 rsmsharelock_acquire(seg);
6425 6425 sharedp->rsmsi_state = RSMSI_STATE_NEW;
6426 6426 cv_broadcast(&sharedp->rsmsi_cv);
6427 6427 rsmsharelock_release(seg);
6428 6428
6429 6429 DBG_PRINTF((category, RSM_ERR,
6430 6430 "rsm_connect error %d\n", e));
6431 6431 if (e == RSMERR_SEG_NOT_PUBLISHED_TO_RSM_ADDR)
6432 6432 return (
6433 6433 RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
6434 6434 else if ((e == RSMERR_RSM_ADDR_UNREACHABLE) ||
6435 6435 (e == RSMERR_UNKNOWN_RSM_ADDR))
6436 6436 return (RSMERR_REMOTE_NODE_UNREACHABLE);
6437 6437 else
6438 6438 return (e);
6439 6439 }
6440 6440
6441 6441 }
↓ open down ↓ |
493 lines elided |
↑ open up ↑ |
6442 6442 seg->s_handle.in = sharedp->rsmsi_handle;
6443 6443
6444 6444 }
6445 6445
6446 6446 seg->s_state = RSM_STATE_CONNECT;
6447 6447
6448 6448
6449 6449 seg->s_flags &= ~RSM_IMPORT_DUMMY; /* clear dummy flag */
6450 6450 if (bar_va) {
6451 6451 /* increment generation number on barrier page */
6452 - atomic_add_16(bar_va + seg->s_hdr.rsmrc_num, 1);
6452 + atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num);
6453 6453 /* return user off into barrier page where status will be */
6454 6454 msg->off = (int)seg->s_hdr.rsmrc_num;
6455 6455 msg->gnum = bar_va[msg->off]; /* gnum race */
6456 6456 } else {
6457 6457 msg->off = 0;
6458 6458 msg->gnum = 0; /* gnum race */
6459 6459 }
6460 6460
6461 6461 msg->len = (int)sharedp->rsmsi_seglen;
6462 6462 msg->rnum = seg->s_minor;
6463 6463 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, RSMSI_STATE_CONNECTED);
6464 6464 rsmsharelock_release(seg);
6465 6465 rsmseglock_release(seg);
6466 6466
6467 6467 /* Return back to user the segment size & perm in case it's needed */
6468 6468
6469 6469 #ifdef _MULTI_DATAMODEL
6470 6470 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6471 6471 rsm_ioctlmsg32_t msg32;
6472 6472
6473 6473 if (msg->len > UINT_MAX)
6474 6474 msg32.len = RSM_MAXSZ_PAGE_ALIGNED;
6475 6475 else
6476 6476 msg32.len = msg->len;
6477 6477 msg32.off = msg->off;
6478 6478 msg32.perm = msg->perm;
6479 6479 msg32.gnum = msg->gnum;
6480 6480 msg32.rnum = msg->rnum;
6481 6481
6482 6482 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6483 6483 "rsm_connect done\n"));
6484 6484
6485 6485 if (ddi_copyout((caddr_t)&msg32, (caddr_t)dataptr,
6486 6486 sizeof (msg32), mode))
6487 6487 return (RSMERR_BAD_ADDR);
6488 6488 else
6489 6489 return (RSM_SUCCESS);
6490 6490 }
6491 6491 #endif
6492 6492 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect done\n"));
6493 6493
6494 6494 if (ddi_copyout((caddr_t)msg, (caddr_t)dataptr, sizeof (*msg),
6495 6495 mode))
6496 6496 return (RSMERR_BAD_ADDR);
6497 6497 else
6498 6498 return (RSM_SUCCESS);
6499 6499 }
6500 6500
6501 6501 static int
6502 6502 rsm_unmap(rsmseg_t *seg)
6503 6503 {
6504 6504 int err;
6505 6505 adapter_t *adapter;
6506 6506 rsm_import_share_t *sharedp;
6507 6507 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6508 6508
6509 6509 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6510 6510 "rsm_unmap enter %u\n", seg->s_segid));
6511 6511
6512 6512 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6513 6513
6514 6514 /* assert seg is locked */
6515 6515 ASSERT(rsmseglock_held(seg));
6516 6516 ASSERT(seg->s_state != RSM_STATE_MAPPING);
6517 6517
6518 6518 if ((seg->s_state != RSM_STATE_ACTIVE) &&
6519 6519 (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
6520 6520 /* segment unmap has already been done */
6521 6521 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6522 6522 return (RSM_SUCCESS);
6523 6523 }
6524 6524
6525 6525 sharedp = seg->s_share;
6526 6526
6527 6527 rsmsharelock_acquire(seg);
6528 6528
6529 6529 /*
6530 6530 * - shared data struct is in MAPPED or MAP_QUIESCE state
6531 6531 */
6532 6532
6533 6533 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED ||
6534 6534 sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
6535 6535
6536 6536 /*
6537 6537 * Unmap pages - previously rsm_memseg_import_unmap was called only if
6538 6538 * the segment cookie list was NULL; but it is always NULL when
6539 6539 * called from rsmmap_unmap and won't be NULL when called for
6540 6540 * a force disconnect - so the check for NULL cookie list was removed
6541 6541 */
6542 6542
6543 6543 ASSERT(sharedp->rsmsi_mapcnt > 0);
6544 6544
6545 6545 sharedp->rsmsi_mapcnt--;
6546 6546
6547 6547 if (sharedp->rsmsi_mapcnt == 0) {
6548 6548 if (sharedp->rsmsi_state == RSMSI_STATE_MAPPED) {
6549 6549 /* unmap the shared RSMPI mapping */
6550 6550 adapter = seg->s_adapter;
6551 6551 if (seg->s_node != my_nodeid) {
6552 6552 ASSERT(sharedp->rsmsi_handle != NULL);
6553 6553 err = adapter->rsmpi_ops->
6554 6554 rsm_unmap(sharedp->rsmsi_handle);
6555 6555 DBG_PRINTF((category, RSM_DEBUG,
6556 6556 "rsm_unmap: rsmpi unmap %d\n", err));
6557 6557 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
6558 6558 sharedp->rsmsi_mapinfo = NULL;
6559 6559 }
6560 6560 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
6561 6561 } else { /* MAP_QUIESCE --munmap()--> CONN_QUIESCE */
6562 6562 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
6563 6563 }
6564 6564 }
6565 6565
6566 6566 rsmsharelock_release(seg);
6567 6567
6568 6568 /*
6569 6569 * The s_cookie field is used to store the cookie returned from the
6570 6570 * ddi_umem_lock when binding the pages for an export segment. This
6571 6571 * is the primary use of the s_cookie field and does not normally
6572 6572 * pertain to any importing segment except in the loopback case.
6573 6573 * For the loopback case, the import segment and export segment are
6574 6574 * on the same node, the s_cookie field of the segment structure for
6575 6575 * the importer is initialized to the s_cookie field in the exported
6576 6576 * segment during the map operation and is used during the call to
6577 6577 * devmap_umem_setup for the import mapping.
6578 6578 * Thus, during unmap, we simply need to set s_cookie to NULL to
6579 6579 * indicate that the mapping no longer exists.
6580 6580 */
6581 6581 seg->s_cookie = NULL;
6582 6582
6583 6583 seg->s_mapinfo = NULL;
6584 6584
6585 6585 if (seg->s_state == RSM_STATE_ACTIVE)
6586 6586 seg->s_state = RSM_STATE_CONNECT;
6587 6587 else
6588 6588 seg->s_state = RSM_STATE_CONN_QUIESCE;
6589 6589
6590 6590 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6591 6591
6592 6592 return (RSM_SUCCESS);
6593 6593 }
6594 6594
6595 6595 /*
6596 6596 * cookie returned here if not null indicates that it is
6597 6597 * the last importer and it can be used in the RSMIPC_NOT_IMPORTING
6598 6598 * message.
6599 6599 */
6600 6600 static int
6601 6601 rsm_closeconnection(rsmseg_t *seg, void **cookie)
6602 6602 {
6603 6603 int e;
6604 6604 adapter_t *adapter;
6605 6605 rsm_import_share_t *sharedp;
6606 6606 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6607 6607
6608 6608 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6609 6609 "rsm_closeconnection enter\n"));
6610 6610
6611 6611 *cookie = (void *)NULL;
6612 6612
6613 6613 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6614 6614
6615 6615 /* assert seg is locked */
6616 6616 ASSERT(rsmseglock_held(seg));
6617 6617
6618 6618 if (seg->s_state == RSM_STATE_DISCONNECT) {
6619 6619 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6620 6620 "rsm_closeconnection done: already disconnected\n"));
6621 6621 return (RSM_SUCCESS);
6622 6622 }
6623 6623
6624 6624 /* wait for all putv/getv ops to get done */
6625 6625 while (seg->s_rdmacnt > 0) {
6626 6626 cv_wait(&seg->s_cv, &seg->s_lock);
6627 6627 }
6628 6628
6629 6629 (void) rsm_unmap(seg);
6630 6630
6631 6631 ASSERT(seg->s_state == RSM_STATE_CONNECT ||
6632 6632 seg->s_state == RSM_STATE_CONN_QUIESCE);
6633 6633
6634 6634 adapter = seg->s_adapter;
6635 6635 sharedp = seg->s_share;
6636 6636
6637 6637 ASSERT(sharedp != NULL);
6638 6638
6639 6639 rsmsharelock_acquire(seg);
6640 6640
6641 6641 /*
6642 6642 * Disconnect on adapter
6643 6643 *
6644 6644 * The current algorithm is stateless, I don't have to contact
6645 6645 * server when I go away. He only gives me permissions. Of course,
6646 6646 * the adapters will talk to terminate the connect.
6647 6647 *
6648 6648 * disconnect is needed only if we are CONNECTED not in CONN_QUIESCE
6649 6649 */
6650 6650 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) &&
6651 6651 (sharedp->rsmsi_node != my_nodeid)) {
6652 6652
6653 6653 if (sharedp->rsmsi_refcnt == 1) {
6654 6654 /* this is the last importer */
6655 6655 ASSERT(sharedp->rsmsi_mapcnt == 0);
6656 6656
6657 6657 e = adapter->rsmpi_ops->
6658 6658 rsm_disconnect(sharedp->rsmsi_handle);
6659 6659 if (e != RSM_SUCCESS) {
6660 6660 DBG_PRINTF((category, RSM_DEBUG,
6661 6661 "rsm:disconnect failed seg=%x:err=%d\n",
6662 6662 seg->s_key, e));
6663 6663 }
6664 6664 }
6665 6665 }
6666 6666
6667 6667 seg->s_handle.in = NULL;
6668 6668
6669 6669 sharedp->rsmsi_refcnt--;
6670 6670
6671 6671 if (sharedp->rsmsi_refcnt == 0) {
6672 6672 *cookie = (void *)sharedp->rsmsi_cookie;
6673 6673 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
6674 6674 sharedp->rsmsi_handle = NULL;
6675 6675 rsmsharelock_release(seg);
6676 6676
6677 6677 /* clean up the shared data structure */
↓ open down ↓ |
215 lines elided |
↑ open up ↑ |
6678 6678 mutex_destroy(&sharedp->rsmsi_lock);
6679 6679 cv_destroy(&sharedp->rsmsi_cv);
6680 6680 kmem_free((void *)(sharedp), sizeof (rsm_import_share_t));
6681 6681
6682 6682 } else {
6683 6683 rsmsharelock_release(seg);
6684 6684 }
6685 6685
6686 6686 /* increment generation number on barrier page */
6687 6687 if (bar_va) {
6688 - atomic_add_16(bar_va + seg->s_hdr.rsmrc_num, 1);
6688 + atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num);
6689 6689 }
6690 6690
6691 6691 /*
6692 6692 * The following needs to be done after any
6693 6693 * rsmsharelock calls which use seg->s_share.
6694 6694 */
6695 6695 seg->s_share = NULL;
6696 6696
6697 6697 seg->s_state = RSM_STATE_DISCONNECT;
6698 6698 /* signal anyone waiting in the CONN_QUIESCE state */
6699 6699 cv_broadcast(&seg->s_cv);
6700 6700
6701 6701 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6702 6702 "rsm_closeconnection done\n"));
6703 6703
6704 6704 return (RSM_SUCCESS);
6705 6705 }
6706 6706
6707 6707 int
6708 6708 rsm_disconnect(rsmseg_t *seg)
6709 6709 {
6710 6710 rsmipc_request_t request;
6711 6711 void *shared_cookie;
6712 6712 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6713 6713
6714 6714 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect enter\n"));
6715 6715
6716 6716 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6717 6717
6718 6718 /* assert seg isn't locked */
6719 6719 ASSERT(!rsmseglock_held(seg));
6720 6720
6721 6721
6722 6722 /* Remove segment from imported list */
6723 6723 rsmimport_rm(seg);
6724 6724
6725 6725 /* acquire the segment */
6726 6726 rsmseglock_acquire(seg);
6727 6727
6728 6728 /* wait until segment leaves the mapping state */
6729 6729 while (seg->s_state == RSM_STATE_MAPPING)
6730 6730 cv_wait(&seg->s_cv, &seg->s_lock);
6731 6731
6732 6732 if (seg->s_state == RSM_STATE_DISCONNECT) {
6733 6733 seg->s_state = RSM_STATE_NEW;
6734 6734 rsmseglock_release(seg);
6735 6735 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6736 6736 "rsm_disconnect done: already disconnected\n"));
6737 6737 return (RSM_SUCCESS);
6738 6738 }
6739 6739
6740 6740 (void) rsm_closeconnection(seg, &shared_cookie);
6741 6741
6742 6742 /* update state */
6743 6743 seg->s_state = RSM_STATE_NEW;
6744 6744
6745 6745 if (shared_cookie != NULL) {
6746 6746 /*
6747 6747 * This is the last importer so inform the exporting node
6748 6748 * so this import can be deleted from the list of importers.
6749 6749 */
6750 6750 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
6751 6751 request.rsmipc_key = seg->s_segid;
6752 6752 request.rsmipc_segment_cookie = shared_cookie;
6753 6753 rsmseglock_release(seg);
6754 6754 (void) rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
6755 6755 } else {
6756 6756 rsmseglock_release(seg);
6757 6757 }
6758 6758
6759 6759 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect done\n"));
6760 6760
6761 6761 return (DDI_SUCCESS);
6762 6762 }
6763 6763
6764 6764 /*ARGSUSED*/
6765 6765 static int
6766 6766 rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
6767 6767 struct pollhead **phpp)
6768 6768 {
6769 6769 minor_t rnum;
6770 6770 rsmresource_t *res;
6771 6771 rsmseg_t *seg;
6772 6772 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
6773 6773
6774 6774 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll enter\n"));
6775 6775
6776 6776 /* find minor, no lock */
6777 6777 rnum = getminor(dev);
6778 6778 res = rsmresource_lookup(rnum, RSM_NOLOCK);
6779 6779
6780 6780 /* poll is supported only for export/import segments */
6781 6781 if ((res == NULL) || (res == RSMRC_RESERVED) ||
6782 6782 (res->rsmrc_type == RSM_RESOURCE_BAR)) {
6783 6783 return (ENXIO);
6784 6784 }
6785 6785
6786 6786 *reventsp = 0;
6787 6787
6788 6788 /*
6789 6789 * An exported segment must be in state RSM_STATE_EXPORT; an
6790 6790 * imported segment must be in state RSM_STATE_ACTIVE.
6791 6791 */
6792 6792 seg = (rsmseg_t *)res;
6793 6793
6794 6794 if (seg->s_pollevent) {
6795 6795 *reventsp = POLLRDNORM;
6796 6796 } else if (!anyyet) {
6797 6797 /* cannot take segment lock here */
6798 6798 *phpp = &seg->s_poll;
6799 6799 seg->s_pollflag |= RSM_SEGMENT_POLL;
6800 6800 }
6801 6801 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll done\n"));
6802 6802 return (0);
6803 6803 }
6804 6804
6805 6805
6806 6806
6807 6807 /* ************************* IOCTL Commands ********************* */
6808 6808
6809 6809 static rsmseg_t *
6810 6810 rsmresource_seg(rsmresource_t *res, minor_t rnum, cred_t *credp,
6811 6811 rsm_resource_type_t type)
6812 6812 {
6813 6813 /* get segment from resource handle */
6814 6814 rsmseg_t *seg;
6815 6815 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
6816 6816
6817 6817 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg enter\n"));
6818 6818
6819 6819
6820 6820 if (res != RSMRC_RESERVED) {
6821 6821 seg = (rsmseg_t *)res;
6822 6822 } else {
6823 6823 /* Allocate segment now and bind it */
6824 6824 seg = rsmseg_alloc(rnum, credp);
6825 6825
6826 6826 /*
6827 6827 * if DR pre-processing is going on or DR is in progress
6828 6828 * then the new export segments should be in the NEW_QSCD state
6829 6829 */
6830 6830 if (type == RSM_RESOURCE_EXPORT_SEGMENT) {
6831 6831 mutex_enter(&rsm_drv_data.drv_lock);
6832 6832 if ((rsm_drv_data.drv_state ==
6833 6833 RSM_DRV_PREDEL_STARTED) ||
6834 6834 (rsm_drv_data.drv_state ==
6835 6835 RSM_DRV_PREDEL_COMPLETED) ||
6836 6836 (rsm_drv_data.drv_state ==
6837 6837 RSM_DRV_DR_IN_PROGRESS)) {
6838 6838 seg->s_state = RSM_STATE_NEW_QUIESCED;
6839 6839 }
6840 6840 mutex_exit(&rsm_drv_data.drv_lock);
6841 6841 }
6842 6842
6843 6843 rsmresource_insert(rnum, (rsmresource_t *)seg, type);
6844 6844 }
6845 6845
6846 6846 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg done\n"));
6847 6847
6848 6848 return (seg);
6849 6849 }
6850 6850
6851 6851 static int
6852 6852 rsmexport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6853 6853 int mode, cred_t *credp)
6854 6854 {
6855 6855 int error;
6856 6856 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
6857 6857
6858 6858 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl enter\n"));
6859 6859
6860 6860 arg = arg;
6861 6861 credp = credp;
6862 6862
6863 6863 ASSERT(seg != NULL);
6864 6864
6865 6865 switch (cmd) {
6866 6866 case RSM_IOCTL_BIND:
6867 6867 error = rsm_bind(seg, msg, arg, mode);
6868 6868 break;
6869 6869 case RSM_IOCTL_REBIND:
6870 6870 error = rsm_rebind(seg, msg);
6871 6871 break;
6872 6872 case RSM_IOCTL_UNBIND:
6873 6873 error = ENOTSUP;
6874 6874 break;
6875 6875 case RSM_IOCTL_PUBLISH:
6876 6876 error = rsm_publish(seg, msg, arg, mode);
6877 6877 break;
6878 6878 case RSM_IOCTL_REPUBLISH:
6879 6879 error = rsm_republish(seg, msg, mode);
6880 6880 break;
6881 6881 case RSM_IOCTL_UNPUBLISH:
6882 6882 error = rsm_unpublish(seg, 1);
6883 6883 break;
6884 6884 default:
6885 6885 error = EINVAL;
6886 6886 break;
6887 6887 }
6888 6888
6889 6889 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl done: %d\n",
6890 6890 error));
6891 6891
6892 6892 return (error);
6893 6893 }
6894 6894 static int
6895 6895 rsmimport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6896 6896 int mode, cred_t *credp)
6897 6897 {
6898 6898 int error;
6899 6899 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6900 6900
6901 6901 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl enter\n"));
6902 6902
6903 6903 ASSERT(seg);
6904 6904
6905 6905 switch (cmd) {
6906 6906 case RSM_IOCTL_CONNECT:
6907 6907 error = rsm_connect(seg, msg, credp, arg, mode);
6908 6908 break;
6909 6909 default:
6910 6910 error = EINVAL;
6911 6911 break;
6912 6912 }
6913 6913
6914 6914 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl done: %d\n",
6915 6915 error));
6916 6916 return (error);
6917 6917 }
6918 6918
6919 6919 static int
6920 6920 rsmbar_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6921 6921 int mode)
6922 6922 {
6923 6923 int e;
6924 6924 adapter_t *adapter;
6925 6925 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6926 6926
6927 6927 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmbar_ioctl enter\n"));
6928 6928
6929 6929
6930 6930 if ((seg->s_flags & RSM_IMPORT_DUMMY) != 0) {
6931 6931 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6932 6932 "rsmbar_ioctl done: RSM_IMPORT_DUMMY\n"));
6933 6933 return (RSMERR_CONN_ABORTED);
6934 6934 } else if (seg->s_node == my_nodeid) {
6935 6935 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6936 6936 "rsmbar_ioctl done: loopback\n"));
6937 6937 return (RSM_SUCCESS);
6938 6938 }
6939 6939
6940 6940 adapter = seg->s_adapter;
6941 6941
6942 6942 switch (cmd) {
6943 6943 case RSM_IOCTL_BAR_CHECK:
6944 6944 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6945 6945 "rsmbar_ioctl done: RSM_BAR_CHECK %d\n", bar_va));
6946 6946 return (bar_va ? RSM_SUCCESS : EINVAL);
6947 6947 case RSM_IOCTL_BAR_OPEN:
6948 6948 e = adapter->rsmpi_ops->
6949 6949 rsm_open_barrier_ctrl(adapter->rsmpi_handle, &msg->bar);
6950 6950 break;
6951 6951 case RSM_IOCTL_BAR_ORDER:
6952 6952 e = adapter->rsmpi_ops->rsm_order_barrier(&msg->bar);
6953 6953 break;
6954 6954 case RSM_IOCTL_BAR_CLOSE:
6955 6955 e = adapter->rsmpi_ops->rsm_close_barrier(&msg->bar);
6956 6956 break;
6957 6957 default:
6958 6958 e = EINVAL;
6959 6959 break;
6960 6960 }
6961 6961
6962 6962 if (e == RSM_SUCCESS) {
6963 6963 #ifdef _MULTI_DATAMODEL
6964 6964 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6965 6965 rsm_ioctlmsg32_t msg32;
6966 6966 int i;
6967 6967
6968 6968 for (i = 0; i < 4; i++) {
6969 6969 msg32.bar.comp[i].u64 = msg->bar.comp[i].u64;
6970 6970 }
6971 6971
6972 6972 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6973 6973 "rsmbar_ioctl done\n"));
6974 6974 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
6975 6975 sizeof (msg32), mode))
6976 6976 return (RSMERR_BAD_ADDR);
6977 6977 else
6978 6978 return (RSM_SUCCESS);
6979 6979 }
6980 6980 #endif
6981 6981 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6982 6982 "rsmbar_ioctl done\n"));
6983 6983 if (ddi_copyout((caddr_t)&msg->bar, (caddr_t)arg,
6984 6984 sizeof (*msg), mode))
6985 6985 return (RSMERR_BAD_ADDR);
6986 6986 else
6987 6987 return (RSM_SUCCESS);
6988 6988 }
6989 6989
6990 6990 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6991 6991 "rsmbar_ioctl done: error=%d\n", e));
6992 6992
6993 6993 return (e);
6994 6994 }
6995 6995
6996 6996 /*
6997 6997 * Ring the doorbell of the export segment to which this segment is
6998 6998 * connected.
6999 6999 */
7000 7000 static int
7001 7001 exportbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7002 7002 {
7003 7003 int e = 0;
7004 7004 rsmipc_request_t request;
7005 7005
7006 7006 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7007 7007
7008 7008 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exportbell_ioctl enter\n"));
7009 7009
7010 7010 request.rsmipc_key = seg->s_segid;
7011 7011 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7012 7012 request.rsmipc_segment_cookie = NULL;
7013 7013 e = rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
7014 7014
7015 7015 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7016 7016 "exportbell_ioctl done: %d\n", e));
7017 7017
7018 7018 return (e);
7019 7019 }
7020 7020
7021 7021 /*
7022 7022 * Ring the doorbells of all segments importing this segment
7023 7023 */
7024 7024 static int
7025 7025 importbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7026 7026 {
7027 7027 importing_token_t *token = NULL;
7028 7028 rsmipc_request_t request;
7029 7029 int index;
7030 7030
7031 7031 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
7032 7032
7033 7033 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importbell_ioctl enter\n"));
7034 7034
7035 7035 ASSERT(seg->s_state != RSM_STATE_NEW &&
7036 7036 seg->s_state != RSM_STATE_NEW_QUIESCED);
7037 7037
7038 7038 request.rsmipc_key = seg->s_segid;
7039 7039 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7040 7040
7041 7041 index = rsmhash(seg->s_segid);
7042 7042
7043 7043 token = importer_list.bucket[index];
7044 7044
7045 7045 while (token != NULL) {
7046 7046 if (seg->s_key == token->key) {
7047 7047 request.rsmipc_segment_cookie =
7048 7048 token->import_segment_cookie;
7049 7049 (void) rsmipc_send(token->importing_node,
7050 7050 &request, RSM_NO_REPLY);
7051 7051 }
7052 7052 token = token->next;
7053 7053 }
7054 7054
7055 7055 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7056 7056 "importbell_ioctl done\n"));
7057 7057 return (RSM_SUCCESS);
7058 7058 }
7059 7059
7060 7060 static int
7061 7061 rsm_consumeevent_copyin(caddr_t arg, rsm_consume_event_msg_t *msgp,
7062 7062 rsm_poll_event_t **eventspp, int mode)
7063 7063 {
7064 7064 rsm_poll_event_t *evlist = NULL;
7065 7065 size_t evlistsz;
7066 7066 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7067 7067
7068 7068 #ifdef _MULTI_DATAMODEL
7069 7069 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7070 7070 int i;
7071 7071 rsm_consume_event_msg32_t cemsg32 = {0};
7072 7072 rsm_poll_event32_t event32[RSM_MAX_POLLFDS];
7073 7073 rsm_poll_event32_t *evlist32;
7074 7074 size_t evlistsz32;
7075 7075
7076 7076 /* copyin the ioctl message */
7077 7077 if (ddi_copyin(arg, (caddr_t)&cemsg32,
7078 7078 sizeof (rsm_consume_event_msg32_t), mode)) {
7079 7079 DBG_PRINTF((category, RSM_ERR,
7080 7080 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7081 7081 return (RSMERR_BAD_ADDR);
7082 7082 }
7083 7083 msgp->seglist = (caddr_t)(uintptr_t)cemsg32.seglist;
7084 7084 msgp->numents = (int)cemsg32.numents;
7085 7085
7086 7086 evlistsz32 = sizeof (rsm_poll_event32_t) * msgp->numents;
7087 7087 /*
7088 7088 * If numents is large alloc events list on heap otherwise
7089 7089 * use the address of array that was passed in.
7090 7090 */
7091 7091 if (msgp->numents > RSM_MAX_POLLFDS) {
7092 7092 if (msgp->numents > max_segs) { /* validate numents */
7093 7093 DBG_PRINTF((category, RSM_ERR,
7094 7094 "consumeevent_copyin: "
7095 7095 "RSMERR_BAD_ARGS_ERRORS\n"));
7096 7096 return (RSMERR_BAD_ARGS_ERRORS);
7097 7097 }
7098 7098 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7099 7099 } else {
7100 7100 evlist32 = event32;
7101 7101 }
7102 7102
7103 7103 /* copyin the seglist into the rsm_poll_event32_t array */
7104 7104 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)evlist32,
7105 7105 evlistsz32, mode)) {
7106 7106 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7107 7107 kmem_free(evlist32, evlistsz32);
7108 7108 }
7109 7109 DBG_PRINTF((category, RSM_ERR,
7110 7110 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7111 7111 return (RSMERR_BAD_ADDR);
7112 7112 }
7113 7113
7114 7114 /* evlist and evlistsz are based on rsm_poll_event_t type */
7115 7115 evlistsz = sizeof (rsm_poll_event_t)* msgp->numents;
7116 7116
7117 7117 if (msgp->numents > RSM_MAX_POLLFDS) {
7118 7118 evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7119 7119 *eventspp = evlist;
7120 7120 } else {
7121 7121 evlist = *eventspp;
7122 7122 }
7123 7123 /*
7124 7124 * copy the rsm_poll_event32_t array to the rsm_poll_event_t
7125 7125 * array
7126 7126 */
7127 7127 for (i = 0; i < msgp->numents; i++) {
7128 7128 evlist[i].rnum = evlist32[i].rnum;
7129 7129 evlist[i].fdsidx = evlist32[i].fdsidx;
7130 7130 evlist[i].revent = evlist32[i].revent;
7131 7131 }
7132 7132 /* free the temp 32-bit event list */
7133 7133 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7134 7134 kmem_free(evlist32, evlistsz32);
7135 7135 }
7136 7136
7137 7137 return (RSM_SUCCESS);
7138 7138 }
7139 7139 #endif
7140 7140 /* copyin the ioctl message */
7141 7141 if (ddi_copyin(arg, (caddr_t)msgp, sizeof (rsm_consume_event_msg_t),
7142 7142 mode)) {
7143 7143 DBG_PRINTF((category, RSM_ERR,
7144 7144 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7145 7145 return (RSMERR_BAD_ADDR);
7146 7146 }
7147 7147 /*
7148 7148 * If numents is large alloc events list on heap otherwise
7149 7149 * use the address of array that was passed in.
7150 7150 */
7151 7151 if (msgp->numents > RSM_MAX_POLLFDS) {
7152 7152 if (msgp->numents > max_segs) { /* validate numents */
7153 7153 DBG_PRINTF((category, RSM_ERR,
7154 7154 "consumeevent_copyin: RSMERR_BAD_ARGS_ERRORS\n"));
7155 7155 return (RSMERR_BAD_ARGS_ERRORS);
7156 7156 }
7157 7157 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7158 7158 evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7159 7159 *eventspp = evlist;
7160 7160 }
7161 7161
7162 7162 /* copyin the seglist */
7163 7163 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)(*eventspp),
7164 7164 sizeof (rsm_poll_event_t)*msgp->numents, mode)) {
7165 7165 if (evlist) {
7166 7166 kmem_free(evlist, evlistsz);
7167 7167 *eventspp = NULL;
7168 7168 }
7169 7169 DBG_PRINTF((category, RSM_ERR,
7170 7170 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7171 7171 return (RSMERR_BAD_ADDR);
7172 7172 }
7173 7173
7174 7174 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7175 7175 "consumeevent_copyin done\n"));
7176 7176 return (RSM_SUCCESS);
7177 7177 }
7178 7178
7179 7179 static int
7180 7180 rsm_consumeevent_copyout(rsm_consume_event_msg_t *msgp,
7181 7181 rsm_poll_event_t *eventsp, int mode)
7182 7182 {
7183 7183 size_t evlistsz;
7184 7184 int err = RSM_SUCCESS;
7185 7185 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7186 7186
7187 7187 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7188 7188 "consumeevent_copyout enter: numents(%d) eventsp(%p)\n",
7189 7189 msgp->numents, eventsp));
7190 7190
7191 7191 #ifdef _MULTI_DATAMODEL
7192 7192 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7193 7193 int i;
7194 7194 rsm_poll_event32_t event32[RSM_MAX_POLLFDS];
7195 7195 rsm_poll_event32_t *evlist32;
7196 7196 size_t evlistsz32;
7197 7197
7198 7198 evlistsz32 = sizeof (rsm_poll_event32_t)*msgp->numents;
7199 7199 if (msgp->numents > RSM_MAX_POLLFDS) {
7200 7200 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7201 7201 } else {
7202 7202 evlist32 = event32;
7203 7203 }
7204 7204
7205 7205 /*
7206 7206 * copy the rsm_poll_event_t array to the rsm_poll_event32_t
7207 7207 * array
7208 7208 */
7209 7209 for (i = 0; i < msgp->numents; i++) {
7210 7210 evlist32[i].rnum = eventsp[i].rnum;
7211 7211 evlist32[i].fdsidx = eventsp[i].fdsidx;
7212 7212 evlist32[i].revent = eventsp[i].revent;
7213 7213 }
7214 7214
7215 7215 if (ddi_copyout((caddr_t)evlist32, (caddr_t)msgp->seglist,
7216 7216 evlistsz32, mode)) {
7217 7217 err = RSMERR_BAD_ADDR;
7218 7218 }
7219 7219
7220 7220 if (msgp->numents > RSM_MAX_POLLFDS) {
7221 7221 if (evlist32) { /* free the temp 32-bit event list */
7222 7222 kmem_free(evlist32, evlistsz32);
7223 7223 }
7224 7224 /*
7225 7225 * eventsp and evlistsz are based on rsm_poll_event_t
7226 7226 * type
7227 7227 */
7228 7228 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7229 7229 /* event list on the heap and needs to be freed here */
7230 7230 if (eventsp) {
7231 7231 kmem_free(eventsp, evlistsz);
7232 7232 }
7233 7233 }
7234 7234
7235 7235 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7236 7236 "consumeevent_copyout done: err=%d\n", err));
7237 7237 return (err);
7238 7238 }
7239 7239 #endif
7240 7240 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7241 7241
7242 7242 if (ddi_copyout((caddr_t)eventsp, (caddr_t)msgp->seglist, evlistsz,
7243 7243 mode)) {
7244 7244 err = RSMERR_BAD_ADDR;
7245 7245 }
7246 7246
7247 7247 if ((msgp->numents > RSM_MAX_POLLFDS) && eventsp) {
7248 7248 /* event list on the heap and needs to be freed here */
7249 7249 kmem_free(eventsp, evlistsz);
7250 7250 }
7251 7251
7252 7252 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7253 7253 "consumeevent_copyout done: err=%d\n", err));
7254 7254 return (err);
7255 7255 }
7256 7256
7257 7257 static int
7258 7258 rsm_consumeevent_ioctl(caddr_t arg, int mode)
7259 7259 {
7260 7260 int rc;
7261 7261 int i;
7262 7262 minor_t rnum;
7263 7263 rsm_consume_event_msg_t msg = {0};
7264 7264 rsmseg_t *seg;
7265 7265 rsm_poll_event_t *event_list;
7266 7266 rsm_poll_event_t events[RSM_MAX_POLLFDS];
7267 7267 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7268 7268
7269 7269 event_list = events;
7270 7270
7271 7271 if ((rc = rsm_consumeevent_copyin(arg, &msg, &event_list, mode)) !=
7272 7272 RSM_SUCCESS) {
7273 7273 return (rc);
7274 7274 }
7275 7275
7276 7276 for (i = 0; i < msg.numents; i++) {
↓ open down ↓ |
578 lines elided |
↑ open up ↑ |
7277 7277 rnum = event_list[i].rnum;
7278 7278 event_list[i].revent = 0;
7279 7279 /* get the segment structure */
7280 7280 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
7281 7281 if (seg) {
7282 7282 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7283 7283 "consumeevent_ioctl: rnum(%d) seg(%p)\n", rnum,
7284 7284 seg));
7285 7285 if (seg->s_pollevent) {
7286 7286 /* consume the event */
7287 - atomic_add_32(&seg->s_pollevent, -1);
7287 + atomic_dec_32(&seg->s_pollevent);
7288 7288 event_list[i].revent = POLLRDNORM;
7289 7289 }
7290 7290 rsmseglock_release(seg);
7291 7291 }
7292 7292 }
7293 7293
7294 7294 if ((rc = rsm_consumeevent_copyout(&msg, event_list, mode)) !=
7295 7295 RSM_SUCCESS) {
7296 7296 return (rc);
7297 7297 }
7298 7298
7299 7299 return (RSM_SUCCESS);
7300 7300 }
7301 7301
7302 7302 static int
7303 7303 iovec_copyin(caddr_t user_vec, rsmka_iovec_t *iovec, int count, int mode)
7304 7304 {
7305 7305 int size;
7306 7306 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7307 7307
7308 7308 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin enter\n"));
7309 7309
7310 7310 #ifdef _MULTI_DATAMODEL
7311 7311 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7312 7312 rsmka_iovec32_t *iovec32, *iovec32_base;
7313 7313 int i;
7314 7314
7315 7315 size = count * sizeof (rsmka_iovec32_t);
7316 7316 iovec32_base = iovec32 = kmem_zalloc(size, KM_SLEEP);
7317 7317 if (ddi_copyin((caddr_t)user_vec,
7318 7318 (caddr_t)iovec32, size, mode)) {
7319 7319 kmem_free(iovec32, size);
7320 7320 DBG_PRINTF((category, RSM_DEBUG,
7321 7321 "iovec_copyin: returning RSMERR_BAD_ADDR\n"));
7322 7322 return (RSMERR_BAD_ADDR);
7323 7323 }
7324 7324
7325 7325 for (i = 0; i < count; i++, iovec++, iovec32++) {
7326 7326 iovec->io_type = (int)iovec32->io_type;
7327 7327 if (iovec->io_type == RSM_HANDLE_TYPE)
7328 7328 iovec->local.segid = (rsm_memseg_id_t)
7329 7329 iovec32->local;
7330 7330 else
7331 7331 iovec->local.vaddr =
7332 7332 (caddr_t)(uintptr_t)iovec32->local;
7333 7333 iovec->local_offset = (size_t)iovec32->local_offset;
7334 7334 iovec->remote_offset = (size_t)iovec32->remote_offset;
7335 7335 iovec->transfer_len = (size_t)iovec32->transfer_len;
7336 7336
7337 7337 }
7338 7338 kmem_free(iovec32_base, size);
7339 7339 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7340 7340 "iovec_copyin done\n"));
7341 7341 return (DDI_SUCCESS);
7342 7342 }
7343 7343 #endif
7344 7344
7345 7345 size = count * sizeof (rsmka_iovec_t);
7346 7346 if (ddi_copyin((caddr_t)user_vec, (caddr_t)iovec, size, mode)) {
7347 7347 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7348 7348 "iovec_copyin done: RSMERR_BAD_ADDR\n"));
7349 7349 return (RSMERR_BAD_ADDR);
7350 7350 }
7351 7351
7352 7352 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin done\n"));
7353 7353
7354 7354 return (DDI_SUCCESS);
7355 7355 }
7356 7356
7357 7357
7358 7358 static int
7359 7359 sgio_copyin(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7360 7360 {
7361 7361 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7362 7362
7363 7363 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin enter\n"));
7364 7364
7365 7365 #ifdef _MULTI_DATAMODEL
7366 7366 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7367 7367 rsmka_scat_gath32_t sg_io32;
7368 7368
7369 7369 if (ddi_copyin(arg, (caddr_t)&sg_io32, sizeof (sg_io32),
7370 7370 mode)) {
7371 7371 DBG_PRINTF((category, RSM_DEBUG,
7372 7372 "sgio_copyin done: returning EFAULT\n"));
7373 7373 return (RSMERR_BAD_ADDR);
7374 7374 }
7375 7375 sg_io->local_nodeid = (rsm_node_id_t)sg_io32.local_nodeid;
7376 7376 sg_io->io_request_count = (size_t)sg_io32.io_request_count;
7377 7377 sg_io->io_residual_count = (size_t)sg_io32.io_residual_count;
7378 7378 sg_io->flags = (size_t)sg_io32.flags;
7379 7379 sg_io->remote_handle = (rsm_memseg_import_handle_t)
7380 7380 (uintptr_t)sg_io32.remote_handle;
7381 7381 sg_io->iovec = (rsmka_iovec_t *)(uintptr_t)sg_io32.iovec;
7382 7382 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7383 7383 "sgio_copyin done\n"));
7384 7384 return (DDI_SUCCESS);
7385 7385 }
7386 7386 #endif
7387 7387 if (ddi_copyin(arg, (caddr_t)sg_io, sizeof (rsmka_scat_gath_t),
7388 7388 mode)) {
7389 7389 DBG_PRINTF((category, RSM_DEBUG,
7390 7390 "sgio_copyin done: returning EFAULT\n"));
7391 7391 return (RSMERR_BAD_ADDR);
7392 7392 }
7393 7393 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin done\n"));
7394 7394 return (DDI_SUCCESS);
7395 7395 }
7396 7396
7397 7397 static int
7398 7398 sgio_resid_copyout(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7399 7399 {
7400 7400 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7401 7401
7402 7402 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7403 7403 "sgio_resid_copyout enter\n"));
7404 7404
7405 7405 #ifdef _MULTI_DATAMODEL
7406 7406 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7407 7407 rsmka_scat_gath32_t sg_io32;
7408 7408
7409 7409 sg_io32.io_residual_count = sg_io->io_residual_count;
7410 7410 sg_io32.flags = sg_io->flags;
7411 7411
7412 7412 if (ddi_copyout((caddr_t)&sg_io32.io_residual_count,
7413 7413 (caddr_t)&((rsmka_scat_gath32_t *)arg)->io_residual_count,
7414 7414 sizeof (uint32_t), mode)) {
7415 7415
7416 7416 DBG_PRINTF((category, RSM_ERR,
7417 7417 "sgio_resid_copyout error: rescnt\n"));
7418 7418 return (RSMERR_BAD_ADDR);
7419 7419 }
7420 7420
7421 7421 if (ddi_copyout((caddr_t)&sg_io32.flags,
7422 7422 (caddr_t)&((rsmka_scat_gath32_t *)arg)->flags,
7423 7423 sizeof (uint32_t), mode)) {
7424 7424
7425 7425 DBG_PRINTF((category, RSM_ERR,
7426 7426 "sgio_resid_copyout error: flags\n"));
7427 7427 return (RSMERR_BAD_ADDR);
7428 7428 }
7429 7429 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7430 7430 "sgio_resid_copyout done\n"));
7431 7431 return (DDI_SUCCESS);
7432 7432 }
7433 7433 #endif
7434 7434 if (ddi_copyout((caddr_t)&sg_io->io_residual_count,
7435 7435 (caddr_t)&((rsmka_scat_gath_t *)arg)->io_residual_count,
7436 7436 sizeof (ulong_t), mode)) {
7437 7437
7438 7438 DBG_PRINTF((category, RSM_ERR,
7439 7439 "sgio_resid_copyout error:rescnt\n"));
7440 7440 return (RSMERR_BAD_ADDR);
7441 7441 }
7442 7442
7443 7443 if (ddi_copyout((caddr_t)&sg_io->flags,
7444 7444 (caddr_t)&((rsmka_scat_gath_t *)arg)->flags,
7445 7445 sizeof (uint_t), mode)) {
7446 7446
7447 7447 DBG_PRINTF((category, RSM_ERR,
7448 7448 "sgio_resid_copyout error:flags\n"));
7449 7449 return (RSMERR_BAD_ADDR);
7450 7450 }
7451 7451
7452 7452 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_resid_copyout done\n"));
7453 7453 return (DDI_SUCCESS);
7454 7454 }
7455 7455
7456 7456
7457 7457 static int
7458 7458 rsm_iovec_ioctl(dev_t dev, caddr_t arg, int cmd, int mode, cred_t *credp)
7459 7459 {
7460 7460 rsmka_scat_gath_t sg_io;
7461 7461 rsmka_iovec_t ka_iovec_arr[RSM_MAX_IOVLEN];
7462 7462 rsmka_iovec_t *ka_iovec;
7463 7463 rsmka_iovec_t *ka_iovec_start;
7464 7464 rsmpi_scat_gath_t rsmpi_sg_io;
7465 7465 rsmpi_iovec_t iovec_arr[RSM_MAX_IOVLEN];
7466 7466 rsmpi_iovec_t *iovec;
7467 7467 rsmpi_iovec_t *iovec_start = NULL;
7468 7468 rsmapi_access_entry_t *acl;
7469 7469 rsmresource_t *res;
7470 7470 minor_t rnum;
7471 7471 rsmseg_t *im_seg, *ex_seg;
7472 7472 int e;
7473 7473 int error = 0;
7474 7474 uint_t i;
7475 7475 uint_t iov_proc = 0; /* num of iovecs processed */
7476 7476 size_t size = 0;
7477 7477 size_t ka_size;
7478 7478
7479 7479 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7480 7480
7481 7481 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_iovec_ioctl enter\n"));
7482 7482
7483 7483 credp = credp;
7484 7484
7485 7485 /*
7486 7486 * Copyin the scatter/gather structure and build new structure
7487 7487 * for rsmpi.
7488 7488 */
7489 7489 e = sgio_copyin(arg, &sg_io, mode);
7490 7490 if (e != DDI_SUCCESS) {
7491 7491 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7492 7492 "rsm_iovec_ioctl done: sgio_copyin %d\n", e));
7493 7493 return (e);
7494 7494 }
7495 7495
7496 7496 if (sg_io.io_request_count > RSM_MAX_SGIOREQS) {
7497 7497 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7498 7498 "rsm_iovec_ioctl done: request_count(%d) too large\n",
7499 7499 sg_io.io_request_count));
7500 7500 return (RSMERR_BAD_SGIO);
7501 7501 }
7502 7502
7503 7503 rsmpi_sg_io.io_request_count = sg_io.io_request_count;
7504 7504 rsmpi_sg_io.io_residual_count = sg_io.io_request_count;
7505 7505 rsmpi_sg_io.io_segflg = 0;
7506 7506
7507 7507 /* Allocate memory and copyin io vector array */
7508 7508 if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7509 7509 ka_size = sg_io.io_request_count * sizeof (rsmka_iovec_t);
7510 7510 ka_iovec_start = ka_iovec = kmem_zalloc(ka_size, KM_SLEEP);
7511 7511 } else {
7512 7512 ka_iovec_start = ka_iovec = ka_iovec_arr;
7513 7513 }
7514 7514 e = iovec_copyin((caddr_t)sg_io.iovec, ka_iovec,
7515 7515 sg_io.io_request_count, mode);
7516 7516 if (e != DDI_SUCCESS) {
7517 7517 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7518 7518 kmem_free(ka_iovec, ka_size);
7519 7519 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7520 7520 "rsm_iovec_ioctl done: iovec_copyin %d\n", e));
7521 7521 return (e);
7522 7522 }
7523 7523
7524 7524 /* get the import segment descriptor */
7525 7525 rnum = getminor(dev);
7526 7526 res = rsmresource_lookup(rnum, RSM_LOCK);
7527 7527
7528 7528 /*
7529 7529 * The following sequence of locking may (or MAY NOT) cause a
7530 7530 * deadlock but this is currently not addressed here since the
7531 7531 * implementation will be changed to incorporate the use of
7532 7532 * reference counting for both the import and the export segments.
7533 7533 */
7534 7534
7535 7535 /* rsmseglock_acquire(im_seg) done in rsmresource_lookup */
7536 7536
7537 7537 im_seg = (rsmseg_t *)res;
7538 7538
7539 7539 if (im_seg == NULL) {
7540 7540 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7541 7541 kmem_free(ka_iovec, ka_size);
7542 7542 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7543 7543 "rsm_iovec_ioctl done: rsmresource_lookup failed\n"));
7544 7544 return (EINVAL);
7545 7545 }
7546 7546 /* putv/getv supported is supported only on import segments */
7547 7547 if (im_seg->s_type != RSM_RESOURCE_IMPORT_SEGMENT) {
7548 7548 rsmseglock_release(im_seg);
7549 7549 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7550 7550 kmem_free(ka_iovec, ka_size);
7551 7551 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7552 7552 "rsm_iovec_ioctl done: not an import segment\n"));
7553 7553 return (EINVAL);
7554 7554 }
7555 7555
7556 7556 /*
7557 7557 * wait for a remote DR to complete ie. for segments to get UNQUIESCED
7558 7558 * as well as wait for a local DR to complete.
7559 7559 */
7560 7560 while ((im_seg->s_state == RSM_STATE_CONN_QUIESCE) ||
7561 7561 (im_seg->s_state == RSM_STATE_MAP_QUIESCE) ||
7562 7562 (im_seg->s_flags & RSM_DR_INPROGRESS)) {
7563 7563 if (cv_wait_sig(&im_seg->s_cv, &im_seg->s_lock) == 0) {
7564 7564 DBG_PRINTF((category, RSM_DEBUG,
7565 7565 "rsm_iovec_ioctl done: cv_wait INTR"));
7566 7566 rsmseglock_release(im_seg);
7567 7567 return (RSMERR_INTERRUPTED);
7568 7568 }
7569 7569 }
7570 7570
7571 7571 if ((im_seg->s_state != RSM_STATE_CONNECT) &&
7572 7572 (im_seg->s_state != RSM_STATE_ACTIVE)) {
7573 7573
7574 7574 ASSERT(im_seg->s_state == RSM_STATE_DISCONNECT ||
7575 7575 im_seg->s_state == RSM_STATE_NEW);
7576 7576
7577 7577 DBG_PRINTF((category, RSM_DEBUG,
7578 7578 "rsm_iovec_ioctl done: im_seg not conn/map"));
7579 7579 rsmseglock_release(im_seg);
7580 7580 e = RSMERR_BAD_SGIO;
7581 7581 goto out;
7582 7582 }
7583 7583
7584 7584 im_seg->s_rdmacnt++;
7585 7585 rsmseglock_release(im_seg);
7586 7586
7587 7587 /*
7588 7588 * Allocate and set up the io vector for rsmpi
7589 7589 */
7590 7590 if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7591 7591 size = sg_io.io_request_count * sizeof (rsmpi_iovec_t);
7592 7592 iovec_start = iovec = kmem_zalloc(size, KM_SLEEP);
7593 7593 } else {
7594 7594 iovec_start = iovec = iovec_arr;
7595 7595 }
7596 7596
7597 7597 rsmpi_sg_io.iovec = iovec;
7598 7598 for (iov_proc = 0; iov_proc < sg_io.io_request_count; iov_proc++) {
7599 7599 if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7600 7600 ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7601 7601
7602 7602 if (ex_seg == NULL) {
7603 7603 e = RSMERR_BAD_SGIO;
7604 7604 break;
7605 7605 }
7606 7606 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7607 7607
7608 7608 acl = ex_seg->s_acl;
7609 7609 if (acl[0].ae_permission == 0) {
7610 7610 struct buf *xbuf;
7611 7611 dev_t sdev = 0;
7612 7612
7613 7613 xbuf = ddi_umem_iosetup(ex_seg->s_cookie,
7614 7614 0, ex_seg->s_len, B_WRITE,
7615 7615 sdev, 0, NULL, DDI_UMEM_SLEEP);
7616 7616
7617 7617 ASSERT(xbuf != NULL);
7618 7618
7619 7619 iovec->local_mem.ms_type = RSM_MEM_BUF;
7620 7620 iovec->local_mem.ms_memory.bp = xbuf;
7621 7621 } else {
7622 7622 iovec->local_mem.ms_type = RSM_MEM_HANDLE;
7623 7623 iovec->local_mem.ms_memory.handle =
7624 7624 ex_seg->s_handle.out;
7625 7625 }
7626 7626 ex_seg->s_rdmacnt++; /* refcnt the handle */
7627 7627 rsmseglock_release(ex_seg);
7628 7628 } else {
7629 7629 iovec->local_mem.ms_type = RSM_MEM_VADDR;
7630 7630 iovec->local_mem.ms_memory.vr.vaddr =
7631 7631 ka_iovec->local.vaddr;
7632 7632 }
7633 7633
7634 7634 iovec->local_offset = ka_iovec->local_offset;
7635 7635 iovec->remote_handle = im_seg->s_handle.in;
7636 7636 iovec->remote_offset = ka_iovec->remote_offset;
7637 7637 iovec->transfer_length = ka_iovec->transfer_len;
7638 7638 iovec++;
7639 7639 ka_iovec++;
7640 7640 }
7641 7641
7642 7642 if (iov_proc < sg_io.io_request_count) {
7643 7643 /* error while processing handle */
7644 7644 rsmseglock_acquire(im_seg);
7645 7645 im_seg->s_rdmacnt--; /* decrement the refcnt for importseg */
7646 7646 if (im_seg->s_rdmacnt == 0) {
7647 7647 cv_broadcast(&im_seg->s_cv);
7648 7648 }
7649 7649 rsmseglock_release(im_seg);
7650 7650 goto out;
7651 7651 }
7652 7652
7653 7653 /* call rsmpi */
7654 7654 if (cmd == RSM_IOCTL_PUTV)
7655 7655 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_putv(
7656 7656 im_seg->s_adapter->rsmpi_handle,
7657 7657 &rsmpi_sg_io);
7658 7658 else if (cmd == RSM_IOCTL_GETV)
7659 7659 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_getv(
7660 7660 im_seg->s_adapter->rsmpi_handle,
7661 7661 &rsmpi_sg_io);
7662 7662 else {
7663 7663 e = EINVAL;
7664 7664 DBG_PRINTF((category, RSM_DEBUG,
7665 7665 "iovec_ioctl: bad command = %x\n", cmd));
7666 7666 }
7667 7667
7668 7668
7669 7669 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7670 7670 "rsm_iovec_ioctl RSMPI oper done %d\n", e));
7671 7671
7672 7672 sg_io.io_residual_count = rsmpi_sg_io.io_residual_count;
7673 7673
7674 7674 /*
7675 7675 * Check for implicit signal post flag and do the signal
7676 7676 * post if needed
7677 7677 */
7678 7678 if (sg_io.flags & RSM_IMPLICIT_SIGPOST &&
7679 7679 e == RSM_SUCCESS) {
7680 7680 rsmipc_request_t request;
7681 7681
7682 7682 request.rsmipc_key = im_seg->s_segid;
7683 7683 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7684 7684 request.rsmipc_segment_cookie = NULL;
7685 7685 e = rsmipc_send(im_seg->s_node, &request, RSM_NO_REPLY);
7686 7686 /*
7687 7687 * Reset the implicit signal post flag to 0 to indicate
7688 7688 * that the signal post has been done and need not be
7689 7689 * done in the RSMAPI library
7690 7690 */
7691 7691 sg_io.flags &= ~RSM_IMPLICIT_SIGPOST;
7692 7692 }
7693 7693
7694 7694 rsmseglock_acquire(im_seg);
7695 7695 im_seg->s_rdmacnt--;
7696 7696 if (im_seg->s_rdmacnt == 0) {
7697 7697 cv_broadcast(&im_seg->s_cv);
7698 7698 }
7699 7699 rsmseglock_release(im_seg);
7700 7700 error = sgio_resid_copyout(arg, &sg_io, mode);
7701 7701 out:
7702 7702 iovec = iovec_start;
7703 7703 ka_iovec = ka_iovec_start;
7704 7704 for (i = 0; i < iov_proc; i++) {
7705 7705 if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7706 7706 ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7707 7707
7708 7708 ASSERT(ex_seg != NULL);
7709 7709 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7710 7710
7711 7711 ex_seg->s_rdmacnt--; /* unrefcnt the handle */
7712 7712 if (ex_seg->s_rdmacnt == 0) {
7713 7713 cv_broadcast(&ex_seg->s_cv);
7714 7714 }
7715 7715 rsmseglock_release(ex_seg);
7716 7716 }
7717 7717
7718 7718 ASSERT(iovec != NULL); /* true if iov_proc > 0 */
7719 7719
7720 7720 /*
7721 7721 * At present there is no dependency on the existence of xbufs
7722 7722 * created by ddi_umem_iosetup for each of the iovecs. So we
7723 7723 * can these xbufs here.
7724 7724 */
7725 7725 if (iovec->local_mem.ms_type == RSM_MEM_BUF) {
7726 7726 freerbuf(iovec->local_mem.ms_memory.bp);
7727 7727 }
7728 7728
7729 7729 iovec++;
7730 7730 ka_iovec++;
7731 7731 }
7732 7732
7733 7733 if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7734 7734 if (iovec_start)
7735 7735 kmem_free(iovec_start, size);
7736 7736 kmem_free(ka_iovec_start, ka_size);
7737 7737 }
7738 7738
7739 7739 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7740 7740 "rsm_iovec_ioctl done %d\n", e));
7741 7741 /* if RSMPI call fails return that else return copyout's retval */
7742 7742 return ((e != RSM_SUCCESS) ? e : error);
7743 7743
7744 7744 }
7745 7745
7746 7746
7747 7747 static int
7748 7748 rsmaddr_ioctl(int cmd, rsm_ioctlmsg_t *msg, int mode)
7749 7749 {
7750 7750 adapter_t *adapter;
7751 7751 rsm_addr_t addr;
7752 7752 rsm_node_id_t node;
7753 7753 int rval = DDI_SUCCESS;
7754 7754 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7755 7755
7756 7756 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmaddr_ioctl enter\n"));
7757 7757
7758 7758 adapter = rsm_getadapter(msg, mode);
7759 7759 if (adapter == NULL) {
7760 7760 DBG_PRINTF((category, RSM_DEBUG,
7761 7761 "rsmaddr_ioctl done: adapter not found\n"));
7762 7762 return (RSMERR_CTLR_NOT_PRESENT);
7763 7763 }
7764 7764
7765 7765 switch (cmd) {
7766 7766 case RSM_IOCTL_MAP_TO_ADDR: /* nodeid to hwaddr mapping */
7767 7767 /* returns the hwaddr in msg->hwaddr */
7768 7768 if (msg->nodeid == my_nodeid) {
7769 7769 msg->hwaddr = adapter->hwaddr;
7770 7770 } else {
7771 7771 addr = get_remote_hwaddr(adapter, msg->nodeid);
7772 7772 if ((int64_t)addr < 0) {
7773 7773 rval = RSMERR_INTERNAL_ERROR;
7774 7774 } else {
7775 7775 msg->hwaddr = addr;
7776 7776 }
7777 7777 }
7778 7778 break;
7779 7779 case RSM_IOCTL_MAP_TO_NODEID: /* hwaddr to nodeid mapping */
7780 7780 /* returns the nodeid in msg->nodeid */
7781 7781 if (msg->hwaddr == adapter->hwaddr) {
7782 7782 msg->nodeid = my_nodeid;
7783 7783 } else {
7784 7784 node = get_remote_nodeid(adapter, msg->hwaddr);
7785 7785 if ((int)node < 0) {
7786 7786 rval = RSMERR_INTERNAL_ERROR;
7787 7787 } else {
7788 7788 msg->nodeid = (rsm_node_id_t)node;
7789 7789 }
7790 7790 }
7791 7791 break;
7792 7792 default:
7793 7793 rval = EINVAL;
7794 7794 break;
7795 7795 }
7796 7796
7797 7797 rsmka_release_adapter(adapter);
7798 7798 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7799 7799 "rsmaddr_ioctl done: %d\n", rval));
7800 7800 return (rval);
7801 7801 }
7802 7802
7803 7803 static int
7804 7804 rsm_ddi_copyin(caddr_t arg, rsm_ioctlmsg_t *msg, int mode)
7805 7805 {
7806 7806 DBG_DEFINE(category,
7807 7807 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7808 7808
7809 7809 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin enter\n"));
7810 7810
7811 7811 #ifdef _MULTI_DATAMODEL
7812 7812
7813 7813 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7814 7814 rsm_ioctlmsg32_t msg32;
7815 7815 int i;
7816 7816
7817 7817 if (ddi_copyin(arg, (caddr_t)&msg32, sizeof (msg32), mode)) {
7818 7818 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7819 7819 "rsm_ddi_copyin done: EFAULT\n"));
7820 7820 return (RSMERR_BAD_ADDR);
7821 7821 }
7822 7822 msg->len = msg32.len;
7823 7823 msg->vaddr = (caddr_t)(uintptr_t)msg32.vaddr;
7824 7824 msg->arg = (caddr_t)(uintptr_t)msg32.arg;
7825 7825 msg->key = msg32.key;
7826 7826 msg->acl_len = msg32.acl_len;
7827 7827 msg->acl = (rsmapi_access_entry_t *)(uintptr_t)msg32.acl;
7828 7828 msg->cnum = msg32.cnum;
7829 7829 msg->cname = (caddr_t)(uintptr_t)msg32.cname;
7830 7830 msg->cname_len = msg32.cname_len;
7831 7831 msg->nodeid = msg32.nodeid;
7832 7832 msg->hwaddr = msg32.hwaddr;
7833 7833 msg->perm = msg32.perm;
7834 7834 for (i = 0; i < 4; i++) {
7835 7835 msg->bar.comp[i].u64 = msg32.bar.comp[i].u64;
7836 7836 }
7837 7837 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7838 7838 "rsm_ddi_copyin done\n"));
7839 7839 return (RSM_SUCCESS);
7840 7840 }
7841 7841 #endif
7842 7842 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin done\n"));
7843 7843 if (ddi_copyin(arg, (caddr_t)msg, sizeof (*msg), mode))
7844 7844 return (RSMERR_BAD_ADDR);
7845 7845 else
7846 7846 return (RSM_SUCCESS);
7847 7847 }
7848 7848
7849 7849 static int
7850 7850 rsmattr_ddi_copyout(adapter_t *adapter, caddr_t arg, int mode)
7851 7851 {
7852 7852 rsmka_int_controller_attr_t rsm_cattr;
7853 7853 DBG_DEFINE(category,
7854 7854 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7855 7855
7856 7856 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7857 7857 "rsmattr_ddi_copyout enter\n"));
7858 7858 /*
7859 7859 * need to copy appropriate data from rsm_controller_attr_t
7860 7860 * to rsmka_int_controller_attr_t
7861 7861 */
7862 7862 #ifdef _MULTI_DATAMODEL
7863 7863 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7864 7864 rsmka_int_controller_attr32_t rsm_cattr32;
7865 7865
7866 7866 rsm_cattr32.attr_direct_access_sizes =
7867 7867 adapter->rsm_attr.attr_direct_access_sizes;
7868 7868 rsm_cattr32.attr_atomic_sizes =
7869 7869 adapter->rsm_attr.attr_atomic_sizes;
7870 7870 rsm_cattr32.attr_page_size =
7871 7871 adapter->rsm_attr.attr_page_size;
7872 7872 if (adapter->rsm_attr.attr_max_export_segment_size >
7873 7873 UINT_MAX)
7874 7874 rsm_cattr32.attr_max_export_segment_size =
7875 7875 RSM_MAXSZ_PAGE_ALIGNED;
7876 7876 else
7877 7877 rsm_cattr32.attr_max_export_segment_size =
7878 7878 adapter->rsm_attr.attr_max_export_segment_size;
7879 7879 if (adapter->rsm_attr.attr_tot_export_segment_size >
7880 7880 UINT_MAX)
7881 7881 rsm_cattr32.attr_tot_export_segment_size =
7882 7882 RSM_MAXSZ_PAGE_ALIGNED;
7883 7883 else
7884 7884 rsm_cattr32.attr_tot_export_segment_size =
7885 7885 adapter->rsm_attr.attr_tot_export_segment_size;
7886 7886 if (adapter->rsm_attr.attr_max_export_segments >
7887 7887 UINT_MAX)
7888 7888 rsm_cattr32.attr_max_export_segments =
7889 7889 UINT_MAX;
7890 7890 else
7891 7891 rsm_cattr32.attr_max_export_segments =
7892 7892 adapter->rsm_attr.attr_max_export_segments;
7893 7893 if (adapter->rsm_attr.attr_max_import_map_size >
7894 7894 UINT_MAX)
7895 7895 rsm_cattr32.attr_max_import_map_size =
7896 7896 RSM_MAXSZ_PAGE_ALIGNED;
7897 7897 else
7898 7898 rsm_cattr32.attr_max_import_map_size =
7899 7899 adapter->rsm_attr.attr_max_import_map_size;
7900 7900 if (adapter->rsm_attr.attr_tot_import_map_size >
7901 7901 UINT_MAX)
7902 7902 rsm_cattr32.attr_tot_import_map_size =
7903 7903 RSM_MAXSZ_PAGE_ALIGNED;
7904 7904 else
7905 7905 rsm_cattr32.attr_tot_import_map_size =
7906 7906 adapter->rsm_attr.attr_tot_import_map_size;
7907 7907 if (adapter->rsm_attr.attr_max_import_segments >
7908 7908 UINT_MAX)
7909 7909 rsm_cattr32.attr_max_import_segments =
7910 7910 UINT_MAX;
7911 7911 else
7912 7912 rsm_cattr32.attr_max_import_segments =
7913 7913 adapter->rsm_attr.attr_max_import_segments;
7914 7914 rsm_cattr32.attr_controller_addr =
7915 7915 adapter->rsm_attr.attr_controller_addr;
7916 7916
7917 7917 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7918 7918 "rsmattr_ddi_copyout done\n"));
7919 7919 if (ddi_copyout((caddr_t)&rsm_cattr32, arg,
7920 7920 sizeof (rsmka_int_controller_attr32_t), mode)) {
7921 7921 return (RSMERR_BAD_ADDR);
7922 7922 }
7923 7923 else
7924 7924 return (RSM_SUCCESS);
7925 7925 }
7926 7926 #endif
7927 7927 rsm_cattr.attr_direct_access_sizes =
7928 7928 adapter->rsm_attr.attr_direct_access_sizes;
7929 7929 rsm_cattr.attr_atomic_sizes =
7930 7930 adapter->rsm_attr.attr_atomic_sizes;
7931 7931 rsm_cattr.attr_page_size =
7932 7932 adapter->rsm_attr.attr_page_size;
7933 7933 rsm_cattr.attr_max_export_segment_size =
7934 7934 adapter->rsm_attr.attr_max_export_segment_size;
7935 7935 rsm_cattr.attr_tot_export_segment_size =
7936 7936 adapter->rsm_attr.attr_tot_export_segment_size;
7937 7937 rsm_cattr.attr_max_export_segments =
7938 7938 adapter->rsm_attr.attr_max_export_segments;
7939 7939 rsm_cattr.attr_max_import_map_size =
7940 7940 adapter->rsm_attr.attr_max_import_map_size;
7941 7941 rsm_cattr.attr_tot_import_map_size =
7942 7942 adapter->rsm_attr.attr_tot_import_map_size;
7943 7943 rsm_cattr.attr_max_import_segments =
7944 7944 adapter->rsm_attr.attr_max_import_segments;
7945 7945 rsm_cattr.attr_controller_addr =
7946 7946 adapter->rsm_attr.attr_controller_addr;
7947 7947 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7948 7948 "rsmattr_ddi_copyout done\n"));
7949 7949 if (ddi_copyout((caddr_t)&rsm_cattr, arg,
7950 7950 sizeof (rsmka_int_controller_attr_t), mode)) {
7951 7951 return (RSMERR_BAD_ADDR);
7952 7952 }
7953 7953 else
7954 7954 return (RSM_SUCCESS);
7955 7955 }
7956 7956
7957 7957 /*ARGSUSED*/
7958 7958 static int
7959 7959 rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
7960 7960 int *rvalp)
7961 7961 {
7962 7962 rsmseg_t *seg;
7963 7963 rsmresource_t *res;
7964 7964 minor_t rnum;
7965 7965 rsm_ioctlmsg_t msg = {0};
7966 7966 int error;
7967 7967 adapter_t *adapter;
7968 7968 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7969 7969
7970 7970 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl enter\n"));
7971 7971
7972 7972 if (cmd == RSM_IOCTL_CONSUMEEVENT) {
7973 7973 error = rsm_consumeevent_ioctl((caddr_t)arg, mode);
7974 7974 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7975 7975 "rsm_ioctl RSM_IOCTL_CONSUMEEVENT done: %d\n", error));
7976 7976 return (error);
7977 7977 }
7978 7978
7979 7979 /* topology cmd does not use the arg common to other cmds */
7980 7980 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_TOPOLOGY) {
7981 7981 error = rsmka_topology_ioctl((caddr_t)arg, cmd, mode);
7982 7982 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7983 7983 "rsm_ioctl done: %d\n", error));
7984 7984 return (error);
7985 7985 }
7986 7986
7987 7987 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_IOVEC) {
7988 7988 error = rsm_iovec_ioctl(dev, (caddr_t)arg, cmd, mode, credp);
7989 7989 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7990 7990 "rsm_ioctl done: %d\n", error));
7991 7991 return (error);
7992 7992 }
7993 7993
7994 7994 /*
7995 7995 * try to load arguments
7996 7996 */
7997 7997 if (cmd != RSM_IOCTL_RING_BELL &&
7998 7998 rsm_ddi_copyin((caddr_t)arg, &msg, mode)) {
7999 7999 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8000 8000 "rsm_ioctl done: EFAULT\n"));
8001 8001 return (RSMERR_BAD_ADDR);
8002 8002 }
8003 8003
8004 8004 if (cmd == RSM_IOCTL_ATTR) {
8005 8005 adapter = rsm_getadapter(&msg, mode);
8006 8006 if (adapter == NULL) {
8007 8007 DBG_PRINTF((category, RSM_DEBUG,
8008 8008 "rsm_ioctl done: ENODEV\n"));
8009 8009 return (RSMERR_CTLR_NOT_PRESENT);
8010 8010 }
8011 8011 error = rsmattr_ddi_copyout(adapter, msg.arg, mode);
8012 8012 rsmka_release_adapter(adapter);
8013 8013 DBG_PRINTF((category, RSM_DEBUG,
8014 8014 "rsm_ioctl:after copyout %d\n", error));
8015 8015 return (error);
8016 8016 }
8017 8017
8018 8018 if (cmd == RSM_IOCTL_BAR_INFO) {
8019 8019 /* Return library off,len of barrier page */
8020 8020 msg.off = barrier_offset;
8021 8021 msg.len = (int)barrier_size;
8022 8022 #ifdef _MULTI_DATAMODEL
8023 8023 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8024 8024 rsm_ioctlmsg32_t msg32;
8025 8025
8026 8026 if (msg.len > UINT_MAX)
8027 8027 msg.len = RSM_MAXSZ_PAGE_ALIGNED;
8028 8028 else
8029 8029 msg32.len = (int32_t)msg.len;
8030 8030 msg32.off = (int32_t)msg.off;
8031 8031 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8032 8032 "rsm_ioctl done\n"));
8033 8033 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8034 8034 sizeof (msg32), mode))
8035 8035 return (RSMERR_BAD_ADDR);
8036 8036 else
8037 8037 return (RSM_SUCCESS);
8038 8038 }
8039 8039 #endif
8040 8040 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8041 8041 "rsm_ioctl done\n"));
8042 8042 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8043 8043 sizeof (msg), mode))
8044 8044 return (RSMERR_BAD_ADDR);
8045 8045 else
8046 8046 return (RSM_SUCCESS);
8047 8047 }
8048 8048
8049 8049 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_MAP_ADDR) {
8050 8050 /* map the nodeid or hwaddr */
8051 8051 error = rsmaddr_ioctl(cmd, &msg, mode);
8052 8052 if (error == RSM_SUCCESS) {
8053 8053 #ifdef _MULTI_DATAMODEL
8054 8054 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8055 8055 rsm_ioctlmsg32_t msg32;
8056 8056
8057 8057 msg32.hwaddr = (uint64_t)msg.hwaddr;
8058 8058 msg32.nodeid = (uint32_t)msg.nodeid;
8059 8059
8060 8060 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8061 8061 "rsm_ioctl done\n"));
8062 8062 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8063 8063 sizeof (msg32), mode))
8064 8064 return (RSMERR_BAD_ADDR);
8065 8065 else
8066 8066 return (RSM_SUCCESS);
8067 8067 }
8068 8068 #endif
8069 8069 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8070 8070 "rsm_ioctl done\n"));
8071 8071 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8072 8072 sizeof (msg), mode))
8073 8073 return (RSMERR_BAD_ADDR);
8074 8074 else
8075 8075 return (RSM_SUCCESS);
8076 8076 }
8077 8077 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8078 8078 "rsm_ioctl done: %d\n", error));
8079 8079 return (error);
8080 8080 }
8081 8081
8082 8082 /* Find resource and look it in read mode */
8083 8083 rnum = getminor(dev);
8084 8084 res = rsmresource_lookup(rnum, RSM_NOLOCK);
8085 8085 ASSERT(res != NULL);
8086 8086
8087 8087 /*
8088 8088 * Find command group
8089 8089 */
8090 8090 switch (RSM_IOCTL_CMDGRP(cmd)) {
8091 8091 case RSM_IOCTL_EXPORT_SEG:
8092 8092 /*
8093 8093 * Export list is searched during publish, loopback and
8094 8094 * remote lookup call.
8095 8095 */
8096 8096 seg = rsmresource_seg(res, rnum, credp,
8097 8097 RSM_RESOURCE_EXPORT_SEGMENT);
8098 8098 if (seg->s_type == RSM_RESOURCE_EXPORT_SEGMENT) {
8099 8099 error = rsmexport_ioctl(seg, &msg, cmd, arg, mode,
8100 8100 credp);
8101 8101 } else { /* export ioctl on an import/barrier resource */
8102 8102 error = RSMERR_BAD_SEG_HNDL;
8103 8103 }
8104 8104 break;
8105 8105 case RSM_IOCTL_IMPORT_SEG:
8106 8106 /* Import list is searched during remote unmap call. */
8107 8107 seg = rsmresource_seg(res, rnum, credp,
8108 8108 RSM_RESOURCE_IMPORT_SEGMENT);
8109 8109 if (seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8110 8110 error = rsmimport_ioctl(seg, &msg, cmd, arg, mode,
8111 8111 credp);
8112 8112 } else { /* import ioctl on an export/barrier resource */
8113 8113 error = RSMERR_BAD_SEG_HNDL;
8114 8114 }
8115 8115 break;
8116 8116 case RSM_IOCTL_BAR:
8117 8117 if (res != RSMRC_RESERVED &&
8118 8118 res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8119 8119 error = rsmbar_ioctl((rsmseg_t *)res, &msg, cmd, arg,
8120 8120 mode);
8121 8121 } else { /* invalid res value */
8122 8122 error = RSMERR_BAD_SEG_HNDL;
8123 8123 }
8124 8124 break;
8125 8125 case RSM_IOCTL_BELL:
8126 8126 if (res != RSMRC_RESERVED) {
8127 8127 if (res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT)
8128 8128 error = exportbell_ioctl((rsmseg_t *)res, cmd);
8129 8129 else if (res->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)
8130 8130 error = importbell_ioctl((rsmseg_t *)res, cmd);
8131 8131 else /* RSM_RESOURCE_BAR */
8132 8132 error = RSMERR_BAD_SEG_HNDL;
8133 8133 } else { /* invalid res value */
8134 8134 error = RSMERR_BAD_SEG_HNDL;
8135 8135 }
8136 8136 break;
8137 8137 default:
8138 8138 error = EINVAL;
8139 8139 }
8140 8140
8141 8141 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl done: %d\n",
8142 8142 error));
8143 8143 return (error);
8144 8144 }
8145 8145
8146 8146
8147 8147 /* **************************** Segment Mapping Operations ********* */
8148 8148 static rsm_mapinfo_t *
8149 8149 rsm_get_mapinfo(rsmseg_t *seg, off_t off, size_t len, off_t *dev_offset,
8150 8150 size_t *map_len)
8151 8151 {
8152 8152 rsm_mapinfo_t *p;
8153 8153 /*
8154 8154 * Find the correct mapinfo structure to use during the mapping
8155 8155 * from the seg->s_mapinfo list.
8156 8156 * The seg->s_mapinfo list contains in reverse order the mappings
8157 8157 * as returned by the RSMPI rsm_map. In rsm_devmap, we need to
8158 8158 * access the correct entry within this list for the mapping
8159 8159 * requested.
8160 8160 *
8161 8161 * The algorithm for selecting a list entry is as follows:
8162 8162 *
8163 8163 * When start_offset of an entry <= off we have found the entry
8164 8164 * we were looking for. Adjust the dev_offset and map_len (needs
8165 8165 * to be PAGESIZE aligned).
8166 8166 */
8167 8167 p = seg->s_mapinfo;
8168 8168 for (; p; p = p->next) {
8169 8169 if (p->start_offset <= off) {
8170 8170 *dev_offset = p->dev_offset + off - p->start_offset;
8171 8171 *map_len = (len > p->individual_len) ?
8172 8172 p->individual_len : ptob(btopr(len));
8173 8173 return (p);
8174 8174 }
8175 8175 p = p->next;
8176 8176 }
8177 8177
8178 8178 return (NULL);
8179 8179 }
8180 8180
8181 8181 static void
8182 8182 rsm_free_mapinfo(rsm_mapinfo_t *mapinfo)
8183 8183 {
8184 8184 rsm_mapinfo_t *p;
8185 8185
8186 8186 while (mapinfo != NULL) {
8187 8187 p = mapinfo;
8188 8188 mapinfo = mapinfo->next;
8189 8189 kmem_free(p, sizeof (*p));
8190 8190 }
8191 8191 }
8192 8192
8193 8193 static int
8194 8194 rsmmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off,
8195 8195 size_t len, void **pvtp)
8196 8196 {
8197 8197 rsmcookie_t *p;
8198 8198 rsmresource_t *res;
8199 8199 rsmseg_t *seg;
8200 8200 minor_t rnum;
8201 8201 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8202 8202
8203 8203 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map enter\n"));
8204 8204
8205 8205 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8206 8206 "rsmmap_map: dhp = %x\n", dhp));
8207 8207
8208 8208 flags = flags;
8209 8209
8210 8210 rnum = getminor(dev);
8211 8211 res = (rsmresource_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8212 8212 ASSERT(res != NULL);
8213 8213
8214 8214 seg = (rsmseg_t *)res;
8215 8215
8216 8216 rsmseglock_acquire(seg);
8217 8217
8218 8218 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8219 8219
8220 8220 /*
8221 8221 * Allocate structure and add cookie to segment list
8222 8222 */
8223 8223 p = kmem_alloc(sizeof (*p), KM_SLEEP);
8224 8224
8225 8225 p->c_dhp = dhp;
8226 8226 p->c_off = off;
8227 8227 p->c_len = len;
8228 8228 p->c_next = seg->s_ckl;
8229 8229 seg->s_ckl = p;
8230 8230
8231 8231 *pvtp = (void *)seg;
8232 8232
8233 8233 rsmseglock_release(seg);
8234 8234
8235 8235 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map done\n"));
8236 8236 return (DDI_SUCCESS);
8237 8237 }
8238 8238
8239 8239 /*
8240 8240 * Page fault handling is done here. The prerequisite mapping setup
8241 8241 * has been done in rsm_devmap with calls to ddi_devmem_setup or
8242 8242 * ddi_umem_setup
8243 8243 */
8244 8244 static int
8245 8245 rsmmap_access(devmap_cookie_t dhp, void *pvt, offset_t offset, size_t len,
8246 8246 uint_t type, uint_t rw)
8247 8247 {
8248 8248 int e;
8249 8249 rsmseg_t *seg = (rsmseg_t *)pvt;
8250 8250 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8251 8251
8252 8252 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access enter\n"));
8253 8253
8254 8254 rsmseglock_acquire(seg);
8255 8255
8256 8256 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8257 8257
8258 8258 while (seg->s_state == RSM_STATE_MAP_QUIESCE) {
8259 8259 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8260 8260 DBG_PRINTF((category, RSM_DEBUG,
8261 8261 "rsmmap_access done: cv_wait INTR"));
8262 8262 rsmseglock_release(seg);
8263 8263 return (RSMERR_INTERRUPTED);
8264 8264 }
8265 8265 }
8266 8266
8267 8267 ASSERT(seg->s_state == RSM_STATE_DISCONNECT ||
8268 8268 seg->s_state == RSM_STATE_ACTIVE);
8269 8269
8270 8270 if (seg->s_state == RSM_STATE_DISCONNECT)
8271 8271 seg->s_flags |= RSM_IMPORT_DUMMY;
8272 8272
8273 8273 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8274 8274 "rsmmap_access: dhp = %x\n", dhp));
8275 8275
8276 8276 rsmseglock_release(seg);
8277 8277
8278 8278 if (e = devmap_load(dhp, offset, len, type, rw)) {
8279 8279 DBG_PRINTF((category, RSM_ERR, "devmap_load failed\n"));
8280 8280 }
8281 8281
8282 8282
8283 8283 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access done\n"));
8284 8284
8285 8285 return (e);
8286 8286 }
8287 8287
8288 8288 static int
8289 8289 rsmmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp,
8290 8290 void **newpvt)
8291 8291 {
8292 8292 rsmseg_t *seg = (rsmseg_t *)oldpvt;
8293 8293 rsmcookie_t *p, *old;
8294 8294 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8295 8295
8296 8296 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup enter\n"));
8297 8297
8298 8298 /*
8299 8299 * Same as map, create an entry to hold cookie and add it to
8300 8300 * connect segment list. The oldpvt is a pointer to segment.
8301 8301 * Return segment pointer in newpvt.
8302 8302 */
8303 8303 rsmseglock_acquire(seg);
8304 8304
8305 8305 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8306 8306
8307 8307 /*
8308 8308 * Find old cookie
8309 8309 */
8310 8310 for (old = seg->s_ckl; old != NULL; old = old->c_next) {
8311 8311 if (old->c_dhp == dhp) {
8312 8312 break;
8313 8313 }
8314 8314 }
8315 8315 if (old == NULL) {
8316 8316 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8317 8317 "rsmmap_dup done: EINVAL\n"));
8318 8318 rsmseglock_release(seg);
8319 8319 return (EINVAL);
8320 8320 }
8321 8321
8322 8322 p = kmem_alloc(sizeof (*p), KM_SLEEP);
8323 8323
8324 8324 p->c_dhp = new_dhp;
8325 8325 p->c_off = old->c_off;
8326 8326 p->c_len = old->c_len;
8327 8327 p->c_next = seg->s_ckl;
8328 8328 seg->s_ckl = p;
8329 8329
8330 8330 *newpvt = (void *)seg;
8331 8331
8332 8332 rsmseglock_release(seg);
8333 8333
8334 8334 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup done\n"));
8335 8335
8336 8336 return (DDI_SUCCESS);
8337 8337 }
8338 8338
8339 8339 static void
8340 8340 rsmmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len,
8341 8341 devmap_cookie_t new_dhp1, void **pvtp1,
8342 8342 devmap_cookie_t new_dhp2, void **pvtp2)
8343 8343 {
8344 8344 /*
8345 8345 * Remove pvtp structure from segment list.
8346 8346 */
8347 8347 rsmseg_t *seg = (rsmseg_t *)pvtp;
8348 8348 int freeflag;
8349 8349
8350 8350 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8351 8351
8352 8352 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap enter\n"));
8353 8353
8354 8354 off = off; len = len;
8355 8355 pvtp1 = pvtp1; pvtp2 = pvtp2;
8356 8356
8357 8357 rsmseglock_acquire(seg);
8358 8358
8359 8359 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8360 8360
8361 8361 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8362 8362 "rsmmap_unmap: dhp = %x\n", dhp));
8363 8363 /*
8364 8364 * We can go ahead and remove the dhps even if we are in
8365 8365 * the MAPPING state because the dhps being removed here
8366 8366 * belong to a different mmap and we are holding the segment
8367 8367 * lock.
8368 8368 */
8369 8369 if (new_dhp1 == NULL && new_dhp2 == NULL) {
8370 8370 /* find and remove dhp handle */
8371 8371 rsmcookie_t *tmp, **back = &seg->s_ckl;
8372 8372
8373 8373 while (*back != NULL) {
8374 8374 tmp = *back;
8375 8375 if (tmp->c_dhp == dhp) {
8376 8376 *back = tmp->c_next;
8377 8377 kmem_free(tmp, sizeof (*tmp));
8378 8378 break;
8379 8379 }
8380 8380 back = &tmp->c_next;
8381 8381 }
8382 8382 } else {
8383 8383 DBG_PRINTF((category, RSM_DEBUG_LVL2,
8384 8384 "rsmmap_unmap:parital unmap"
8385 8385 "new_dhp1 %lx, new_dhp2 %lx\n",
8386 8386 (size_t)new_dhp1, (size_t)new_dhp2));
8387 8387 }
8388 8388
8389 8389 /*
8390 8390 * rsmmap_unmap is called for each mapping cookie on the list.
8391 8391 * When the list becomes empty and we are not in the MAPPING
8392 8392 * state then unmap in the rsmpi driver.
8393 8393 */
8394 8394 if ((seg->s_ckl == NULL) && (seg->s_state != RSM_STATE_MAPPING))
8395 8395 (void) rsm_unmap(seg);
8396 8396
8397 8397 if (seg->s_state == RSM_STATE_END && seg->s_ckl == NULL) {
8398 8398 freeflag = 1;
8399 8399 } else {
8400 8400 freeflag = 0;
8401 8401 }
8402 8402
8403 8403 rsmseglock_release(seg);
8404 8404
8405 8405 if (freeflag) {
8406 8406 /* Free the segment structure */
8407 8407 rsmseg_free(seg);
8408 8408 }
8409 8409 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap done\n"));
8410 8410
8411 8411 }
8412 8412
8413 8413 static struct devmap_callback_ctl rsmmap_ops = {
8414 8414 DEVMAP_OPS_REV, /* devmap_ops version number */
8415 8415 rsmmap_map, /* devmap_ops map routine */
8416 8416 rsmmap_access, /* devmap_ops access routine */
8417 8417 rsmmap_dup, /* devmap_ops dup routine */
8418 8418 rsmmap_unmap, /* devmap_ops unmap routine */
8419 8419 };
8420 8420
8421 8421 static int
8422 8422 rsm_devmap(dev_t dev, devmap_cookie_t dhc, offset_t off, size_t len,
8423 8423 size_t *maplen, uint_t model /*ARGSUSED*/)
8424 8424 {
8425 8425 struct devmap_callback_ctl *callbackops = &rsmmap_ops;
8426 8426 int err;
8427 8427 uint_t maxprot;
8428 8428 minor_t rnum;
8429 8429 rsmseg_t *seg;
8430 8430 off_t dev_offset;
8431 8431 size_t cur_len;
8432 8432 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8433 8433
8434 8434 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_devmap enter\n"));
8435 8435
8436 8436 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8437 8437 "rsm_devmap: off = %lx, len = %lx\n", off, len));
8438 8438 rnum = getminor(dev);
8439 8439 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8440 8440 ASSERT(seg != NULL);
8441 8441
8442 8442 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8443 8443 if ((off == barrier_offset) &&
8444 8444 (len == barrier_size)) {
8445 8445
8446 8446 ASSERT(bar_va != NULL && bar_cookie != NULL);
8447 8447
8448 8448 /*
8449 8449 * The offset argument in devmap_umem_setup represents
8450 8450 * the offset within the kernel memory defined by the
8451 8451 * cookie. We use this offset as barrier_offset.
8452 8452 */
8453 8453 err = devmap_umem_setup(dhc, rsm_dip, NULL, bar_cookie,
8454 8454 barrier_offset, len, PROT_USER|PROT_READ,
8455 8455 DEVMAP_DEFAULTS, 0);
8456 8456
8457 8457 if (err != 0) {
8458 8458 DBG_PRINTF((category, RSM_ERR,
8459 8459 "rsm_devmap done: %d\n", err));
8460 8460 return (RSMERR_MAP_FAILED);
8461 8461 }
8462 8462 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8463 8463 "rsm_devmap done: %d\n", err));
8464 8464
8465 8465 *maplen = barrier_size;
8466 8466
8467 8467 return (err);
8468 8468 } else {
8469 8469 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8470 8470 "rsm_devmap done: %d\n", err));
8471 8471 return (RSMERR_MAP_FAILED);
8472 8472 }
8473 8473 }
8474 8474
8475 8475 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8476 8476 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8477 8477
8478 8478 /*
8479 8479 * Make sure we still have permission for the map operation.
8480 8480 */
8481 8481 maxprot = PROT_USER;
8482 8482 if (seg->s_mode & RSM_PERM_READ) {
8483 8483 maxprot |= PROT_READ;
8484 8484 }
8485 8485
8486 8486 if (seg->s_mode & RSM_PERM_WRITE) {
8487 8487 maxprot |= PROT_WRITE;
8488 8488 }
8489 8489
8490 8490 /*
8491 8491 * For each devmap call, rsmmap_map is called. This maintains driver
8492 8492 * private information for the mapping. Thus, if there are multiple
8493 8493 * devmap calls there will be multiple rsmmap_map calls and for each
8494 8494 * call, the mapping information will be stored.
8495 8495 * In case of an error during the processing of the devmap call, error
8496 8496 * will be returned. This error return causes the caller of rsm_devmap
8497 8497 * to undo all the mappings by calling rsmmap_unmap for each one.
8498 8498 * rsmmap_unmap will free up the private information for the requested
8499 8499 * mapping.
8500 8500 */
8501 8501 if (seg->s_node != my_nodeid) {
8502 8502 rsm_mapinfo_t *p;
8503 8503
8504 8504 p = rsm_get_mapinfo(seg, off, len, &dev_offset, &cur_len);
8505 8505 if (p == NULL) {
8506 8506 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8507 8507 "rsm_devmap: incorrect mapping info\n"));
8508 8508 return (RSMERR_MAP_FAILED);
8509 8509 }
8510 8510 err = devmap_devmem_setup(dhc, p->dip,
8511 8511 callbackops, p->dev_register,
8512 8512 dev_offset, cur_len, maxprot,
8513 8513 DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS, 0);
8514 8514
8515 8515 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8516 8516 "rsm_devmap: dip=%lx,dreg=%lu,doff=%lx,"
8517 8517 "off=%lx,len=%lx\n",
8518 8518 p->dip, p->dev_register, dev_offset, off, cur_len));
8519 8519
8520 8520 if (err != 0) {
8521 8521 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8522 8522 "rsm_devmap: devmap_devmem_setup failed %d\n",
8523 8523 err));
8524 8524 return (RSMERR_MAP_FAILED);
8525 8525 }
8526 8526 /* cur_len is always an integral multiple pagesize */
8527 8527 ASSERT((cur_len & (PAGESIZE-1)) == 0);
8528 8528 *maplen = cur_len;
8529 8529 return (err);
8530 8530
8531 8531 } else {
8532 8532 err = devmap_umem_setup(dhc, rsm_dip, callbackops,
8533 8533 seg->s_cookie, off, len, maxprot,
8534 8534 DEVMAP_ALLOW_REMAP|DEVMAP_DEFAULTS, 0);
8535 8535 if (err != 0) {
8536 8536 DBG_PRINTF((category, RSM_DEBUG,
8537 8537 "rsm_devmap: devmap_umem_setup failed %d\n",
8538 8538 err));
8539 8539 return (RSMERR_MAP_FAILED);
8540 8540 }
8541 8541 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8542 8542 "rsm_devmap: loopback done\n"));
8543 8543
8544 8544 *maplen = ptob(btopr(len));
8545 8545
8546 8546 return (err);
8547 8547 }
8548 8548 }
8549 8549
8550 8550 /*
8551 8551 * We can use the devmap framework for mapping device memory to user space by
8552 8552 * specifying this routine in the rsm_cb_ops structure. The kernel mmap
8553 8553 * processing calls this entry point and devmap_setup is called within this
8554 8554 * function, which eventually calls rsm_devmap
8555 8555 */
8556 8556 static int
8557 8557 rsm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
8558 8558 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
8559 8559 {
8560 8560 int error = 0;
8561 8561 int old_state;
8562 8562 minor_t rnum;
8563 8563 rsmseg_t *seg, *eseg;
8564 8564 adapter_t *adapter;
8565 8565 rsm_import_share_t *sharedp;
8566 8566 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8567 8567
8568 8568 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_segmap enter\n"));
8569 8569
8570 8570 /*
8571 8571 * find segment
8572 8572 */
8573 8573 rnum = getminor(dev);
8574 8574 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
8575 8575
8576 8576 if (seg == NULL) {
8577 8577 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8578 8578 "rsm_segmap done: invalid segment\n"));
8579 8579 return (EINVAL);
8580 8580 }
8581 8581
8582 8582 /*
8583 8583 * the user is trying to map a resource that has not been
8584 8584 * defined yet. The library uses this to map in the
8585 8585 * barrier page.
8586 8586 */
8587 8587 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8588 8588 rsmseglock_release(seg);
8589 8589
8590 8590 /*
8591 8591 * The mapping for the barrier page is identified
8592 8592 * by the special offset barrier_offset
8593 8593 */
8594 8594
8595 8595 if (off == (off_t)barrier_offset ||
8596 8596 len == (off_t)barrier_size) {
8597 8597 if (bar_cookie == NULL || bar_va == NULL) {
8598 8598 DBG_PRINTF((category, RSM_DEBUG,
8599 8599 "rsm_segmap: bar cookie/va is NULL\n"));
8600 8600 return (EINVAL);
8601 8601 }
8602 8602
8603 8603 error = devmap_setup(dev, (offset_t)off, as, addrp,
8604 8604 (size_t)len, prot, maxprot, flags, cred);
8605 8605
8606 8606 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8607 8607 "rsm_segmap done: %d\n", error));
8608 8608 return (error);
8609 8609 } else {
8610 8610 DBG_PRINTF((category, RSM_DEBUG,
8611 8611 "rsm_segmap: bad offset/length\n"));
8612 8612 return (EINVAL);
8613 8613 }
8614 8614 }
8615 8615
8616 8616 /* Make sure you can only map imported segments */
8617 8617 if (seg->s_hdr.rsmrc_type != RSM_RESOURCE_IMPORT_SEGMENT) {
8618 8618 rsmseglock_release(seg);
8619 8619 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8620 8620 "rsm_segmap done: not an import segment\n"));
8621 8621 return (EINVAL);
8622 8622 }
8623 8623 /* check means library is broken */
8624 8624 ASSERT(seg->s_hdr.rsmrc_num == rnum);
8625 8625
8626 8626 /* wait for the segment to become unquiesced */
8627 8627 while (seg->s_state == RSM_STATE_CONN_QUIESCE) {
8628 8628 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8629 8629 rsmseglock_release(seg);
8630 8630 DBG_PRINTF((category, RSM_DEBUG,
8631 8631 "rsm_segmap done: cv_wait INTR"));
8632 8632 return (ENODEV);
8633 8633 }
8634 8634 }
8635 8635
8636 8636 /* wait until segment leaves the mapping state */
8637 8637 while (seg->s_state == RSM_STATE_MAPPING)
8638 8638 cv_wait(&seg->s_cv, &seg->s_lock);
8639 8639
8640 8640 /*
8641 8641 * we allow multiple maps of the same segment in the KA
8642 8642 * and it works because we do an rsmpi map of the whole
8643 8643 * segment during the first map and all the device mapping
8644 8644 * information needed in rsm_devmap is in the mapinfo list.
8645 8645 */
8646 8646 if ((seg->s_state != RSM_STATE_CONNECT) &&
8647 8647 (seg->s_state != RSM_STATE_ACTIVE)) {
8648 8648 rsmseglock_release(seg);
8649 8649 DBG_PRINTF((category, RSM_DEBUG,
8650 8650 "rsm_segmap done: segment not connected\n"));
8651 8651 return (ENODEV);
8652 8652 }
8653 8653
8654 8654 /*
8655 8655 * Make sure we are not mapping a larger segment than what's
8656 8656 * exported
8657 8657 */
8658 8658 if ((size_t)off + ptob(btopr(len)) > seg->s_len) {
8659 8659 rsmseglock_release(seg);
8660 8660 DBG_PRINTF((category, RSM_DEBUG,
8661 8661 "rsm_segmap done: off+len>seg size\n"));
8662 8662 return (ENXIO);
8663 8663 }
8664 8664
8665 8665 /*
8666 8666 * Make sure we still have permission for the map operation.
8667 8667 */
8668 8668 maxprot = PROT_USER;
8669 8669 if (seg->s_mode & RSM_PERM_READ) {
8670 8670 maxprot |= PROT_READ;
8671 8671 }
8672 8672
8673 8673 if (seg->s_mode & RSM_PERM_WRITE) {
8674 8674 maxprot |= PROT_WRITE;
8675 8675 }
8676 8676
8677 8677 if ((prot & maxprot) != prot) {
8678 8678 /* No permission */
8679 8679 rsmseglock_release(seg);
8680 8680 DBG_PRINTF((category, RSM_DEBUG,
8681 8681 "rsm_segmap done: no permission\n"));
8682 8682 return (EACCES);
8683 8683 }
8684 8684
8685 8685 old_state = seg->s_state;
8686 8686
8687 8687 ASSERT(seg->s_share != NULL);
8688 8688
8689 8689 rsmsharelock_acquire(seg);
8690 8690
8691 8691 sharedp = seg->s_share;
8692 8692
8693 8693 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8694 8694 "rsm_segmap:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
8695 8695
8696 8696 if ((sharedp->rsmsi_state != RSMSI_STATE_CONNECTED) &&
8697 8697 (sharedp->rsmsi_state != RSMSI_STATE_MAPPED)) {
8698 8698 rsmsharelock_release(seg);
8699 8699 rsmseglock_release(seg);
8700 8700 DBG_PRINTF((category, RSM_DEBUG,
8701 8701 "rsm_segmap done:RSMSI_STATE %d invalid\n",
8702 8702 sharedp->rsmsi_state));
8703 8703 return (ENODEV);
8704 8704 }
8705 8705
8706 8706 /*
8707 8707 * Do the map - since we want importers to share mappings
8708 8708 * we do the rsmpi map for the whole segment
8709 8709 */
8710 8710 if (seg->s_node != my_nodeid) {
8711 8711 uint_t dev_register;
8712 8712 off_t dev_offset;
8713 8713 dev_info_t *dip;
8714 8714 size_t tmp_len;
8715 8715 size_t total_length_mapped = 0;
8716 8716 size_t length_to_map = seg->s_len;
8717 8717 off_t tmp_off = 0;
8718 8718 rsm_mapinfo_t *p;
8719 8719
8720 8720 /*
8721 8721 * length_to_map = seg->s_len is always an integral
8722 8722 * multiple of PAGESIZE. Length mapped in each entry in mapinfo
8723 8723 * list is a multiple of PAGESIZE - RSMPI map ensures this
8724 8724 */
8725 8725
8726 8726 adapter = seg->s_adapter;
8727 8727 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8728 8728 sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8729 8729
8730 8730 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) {
8731 8731 error = 0;
8732 8732 /* map the whole segment */
8733 8733 while (total_length_mapped < seg->s_len) {
8734 8734 tmp_len = 0;
8735 8735
8736 8736 error = adapter->rsmpi_ops->rsm_map(
8737 8737 seg->s_handle.in, tmp_off,
8738 8738 length_to_map, &tmp_len,
8739 8739 &dip, &dev_register, &dev_offset,
8740 8740 NULL, NULL);
8741 8741
8742 8742 if (error != 0)
8743 8743 break;
8744 8744
8745 8745 /*
8746 8746 * Store the mapping info obtained from rsm_map
8747 8747 */
8748 8748 p = kmem_alloc(sizeof (*p), KM_SLEEP);
8749 8749 p->dev_register = dev_register;
8750 8750 p->dev_offset = dev_offset;
8751 8751 p->dip = dip;
8752 8752 p->individual_len = tmp_len;
8753 8753 p->start_offset = tmp_off;
8754 8754 p->next = sharedp->rsmsi_mapinfo;
8755 8755 sharedp->rsmsi_mapinfo = p;
8756 8756
8757 8757 total_length_mapped += tmp_len;
8758 8758 length_to_map -= tmp_len;
8759 8759 tmp_off += tmp_len;
8760 8760 }
8761 8761 seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8762 8762
8763 8763 if (error != RSM_SUCCESS) {
8764 8764 /* Check if this is the the first rsm_map */
8765 8765 if (sharedp->rsmsi_mapinfo != NULL) {
8766 8766 /*
8767 8767 * A single rsm_unmap undoes
8768 8768 * multiple rsm_maps.
8769 8769 */
8770 8770 (void) seg->s_adapter->rsmpi_ops->
8771 8771 rsm_unmap(sharedp->rsmsi_handle);
8772 8772 rsm_free_mapinfo(sharedp->
8773 8773 rsmsi_mapinfo);
8774 8774 }
8775 8775 sharedp->rsmsi_mapinfo = NULL;
8776 8776 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8777 8777 rsmsharelock_release(seg);
8778 8778 rsmseglock_release(seg);
8779 8779 DBG_PRINTF((category, RSM_DEBUG,
8780 8780 "rsm_segmap done: rsmpi map err %d\n",
8781 8781 error));
8782 8782 ASSERT(error != RSMERR_BAD_LENGTH &&
8783 8783 error != RSMERR_BAD_MEM_ALIGNMENT &&
8784 8784 error != RSMERR_BAD_SEG_HNDL);
8785 8785 if (error == RSMERR_UNSUPPORTED_OPERATION)
8786 8786 return (ENOTSUP);
8787 8787 else if (error == RSMERR_INSUFFICIENT_RESOURCES)
8788 8788 return (EAGAIN);
8789 8789 else if (error == RSMERR_CONN_ABORTED)
8790 8790 return (ENODEV);
8791 8791 else
8792 8792 return (error);
8793 8793 } else {
8794 8794 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8795 8795 }
8796 8796 } else {
8797 8797 seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8798 8798 }
8799 8799
8800 8800 sharedp->rsmsi_mapcnt++;
8801 8801
8802 8802 rsmsharelock_release(seg);
8803 8803
8804 8804 /* move to an intermediate mapping state */
8805 8805 seg->s_state = RSM_STATE_MAPPING;
8806 8806 rsmseglock_release(seg);
8807 8807
8808 8808 error = devmap_setup(dev, (offset_t)off, as, addrp,
8809 8809 len, prot, maxprot, flags, cred);
8810 8810
8811 8811 rsmseglock_acquire(seg);
8812 8812 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8813 8813
8814 8814 if (error == DDI_SUCCESS) {
8815 8815 seg->s_state = RSM_STATE_ACTIVE;
8816 8816 } else {
8817 8817 rsmsharelock_acquire(seg);
8818 8818
8819 8819 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8820 8820
8821 8821 sharedp->rsmsi_mapcnt--;
8822 8822 if (sharedp->rsmsi_mapcnt == 0) {
8823 8823 /* unmap the shared RSMPI mapping */
8824 8824 ASSERT(sharedp->rsmsi_handle != NULL);
8825 8825 (void) adapter->rsmpi_ops->
8826 8826 rsm_unmap(sharedp->rsmsi_handle);
8827 8827 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
8828 8828 sharedp->rsmsi_mapinfo = NULL;
8829 8829 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8830 8830 }
8831 8831
8832 8832 rsmsharelock_release(seg);
8833 8833 seg->s_state = old_state;
8834 8834 DBG_PRINTF((category, RSM_ERR,
8835 8835 "rsm: devmap_setup failed %d\n", error));
8836 8836 }
8837 8837 cv_broadcast(&seg->s_cv);
8838 8838 rsmseglock_release(seg);
8839 8839 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsm_segmap done: %d\n",
8840 8840 error));
8841 8841 return (error);
8842 8842 } else {
8843 8843 /*
8844 8844 * For loopback, the export segment mapping cookie (s_cookie)
8845 8845 * is also used as the s_cookie value for its import segments
8846 8846 * during mapping.
8847 8847 * Note that reference counting for s_cookie of the export
8848 8848 * segment is not required due to the following:
8849 8849 * We never have a case of the export segment being destroyed,
8850 8850 * leaving the import segments with a stale value for the
8851 8851 * s_cookie field, since a force disconnect is done prior to a
8852 8852 * destroy of an export segment. The force disconnect causes
8853 8853 * the s_cookie value to be reset to NULL. Also for the
8854 8854 * rsm_rebind operation, we change the s_cookie value of the
8855 8855 * export segment as well as of all its local (loopback)
8856 8856 * importers.
8857 8857 */
8858 8858 DBG_ADDCATEGORY(category, RSM_LOOPBACK);
8859 8859
8860 8860 rsmsharelock_release(seg);
8861 8861 /*
8862 8862 * In order to maintain the lock ordering between the export
8863 8863 * and import segment locks, we need to acquire the export
8864 8864 * segment lock first and only then acquire the import
8865 8865 * segment lock.
8866 8866 * The above is necessary to avoid any deadlock scenarios
8867 8867 * with rsm_rebind which also acquires both the export
8868 8868 * and import segment locks in the above mentioned order.
8869 8869 * Based on code inspection, there seem to be no other
8870 8870 * situations in which both the export and import segment
8871 8871 * locks are acquired either in the same or opposite order
8872 8872 * as mentioned above.
8873 8873 * Thus in order to conform to the above lock order, we
8874 8874 * need to change the state of the import segment to
8875 8875 * RSM_STATE_MAPPING, release the lock. Once this is done we
8876 8876 * can now safely acquire the export segment lock first
8877 8877 * followed by the import segment lock which is as per
8878 8878 * the lock order mentioned above.
8879 8879 */
8880 8880 /* move to an intermediate mapping state */
8881 8881 seg->s_state = RSM_STATE_MAPPING;
8882 8882 rsmseglock_release(seg);
8883 8883
8884 8884 eseg = rsmexport_lookup(seg->s_key);
8885 8885
8886 8886 if (eseg == NULL) {
8887 8887 rsmseglock_acquire(seg);
8888 8888 /*
8889 8889 * Revert to old_state and signal any waiters
8890 8890 * The shared state is not changed
8891 8891 */
8892 8892
8893 8893 seg->s_state = old_state;
8894 8894 cv_broadcast(&seg->s_cv);
8895 8895 rsmseglock_release(seg);
8896 8896 DBG_PRINTF((category, RSM_DEBUG,
8897 8897 "rsm_segmap done: key %d not found\n", seg->s_key));
8898 8898 return (ENODEV);
8899 8899 }
8900 8900
8901 8901 rsmsharelock_acquire(seg);
8902 8902 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8903 8903 sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8904 8904
8905 8905 sharedp->rsmsi_mapcnt++;
8906 8906 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8907 8907 rsmsharelock_release(seg);
8908 8908
8909 8909 ASSERT(eseg->s_cookie != NULL);
8910 8910
8911 8911 /*
8912 8912 * It is not required or necessary to acquire the import
8913 8913 * segment lock here to change the value of s_cookie since
8914 8914 * no one will touch the import segment as long as it is
8915 8915 * in the RSM_STATE_MAPPING state.
8916 8916 */
8917 8917 seg->s_cookie = eseg->s_cookie;
8918 8918
8919 8919 rsmseglock_release(eseg);
8920 8920
8921 8921 error = devmap_setup(dev, (offset_t)off, as, addrp, (size_t)len,
8922 8922 prot, maxprot, flags, cred);
8923 8923
8924 8924 rsmseglock_acquire(seg);
8925 8925 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8926 8926 if (error == 0) {
8927 8927 seg->s_state = RSM_STATE_ACTIVE;
8928 8928 } else {
8929 8929 rsmsharelock_acquire(seg);
8930 8930
8931 8931 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8932 8932
8933 8933 sharedp->rsmsi_mapcnt--;
8934 8934 if (sharedp->rsmsi_mapcnt == 0) {
8935 8935 sharedp->rsmsi_mapinfo = NULL;
8936 8936 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8937 8937 }
8938 8938 rsmsharelock_release(seg);
8939 8939 seg->s_state = old_state;
8940 8940 seg->s_cookie = NULL;
8941 8941 }
8942 8942 cv_broadcast(&seg->s_cv);
8943 8943 rsmseglock_release(seg);
8944 8944 DBG_PRINTF((category, RSM_DEBUG_LVL2,
8945 8945 "rsm_segmap done: %d\n", error));
8946 8946 return (error);
8947 8947 }
8948 8948 }
8949 8949
8950 8950 int
8951 8951 rsmka_null_seg_create(
8952 8952 rsm_controller_handle_t argcp,
8953 8953 rsm_memseg_export_handle_t *handle,
8954 8954 size_t size,
8955 8955 uint_t flags,
8956 8956 rsm_memory_local_t *memory,
8957 8957 rsm_resource_callback_t callback,
8958 8958 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
8959 8959 {
8960 8960 return (RSM_SUCCESS);
8961 8961 }
8962 8962
8963 8963
8964 8964 int
8965 8965 rsmka_null_seg_destroy(
8966 8966 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/)
8967 8967 {
8968 8968 return (RSM_SUCCESS);
8969 8969 }
8970 8970
8971 8971
8972 8972 int
8973 8973 rsmka_null_bind(
8974 8974 rsm_memseg_export_handle_t argmemseg,
8975 8975 off_t offset,
8976 8976 rsm_memory_local_t *argmemory,
8977 8977 rsm_resource_callback_t callback,
8978 8978 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
8979 8979 {
8980 8980 return (RSM_SUCCESS);
8981 8981 }
8982 8982
8983 8983
8984 8984 int
8985 8985 rsmka_null_unbind(
8986 8986 rsm_memseg_export_handle_t argmemseg,
8987 8987 off_t offset,
8988 8988 size_t length /*ARGSUSED*/)
8989 8989 {
8990 8990 return (DDI_SUCCESS);
8991 8991 }
8992 8992
8993 8993 int
8994 8994 rsmka_null_rebind(
8995 8995 rsm_memseg_export_handle_t argmemseg,
8996 8996 off_t offset,
8997 8997 rsm_memory_local_t *memory,
8998 8998 rsm_resource_callback_t callback,
8999 8999 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
9000 9000 {
9001 9001 return (RSM_SUCCESS);
9002 9002 }
9003 9003
9004 9004 int
9005 9005 rsmka_null_publish(
9006 9006 rsm_memseg_export_handle_t argmemseg,
9007 9007 rsm_access_entry_t access_list[],
9008 9008 uint_t access_list_length,
9009 9009 rsm_memseg_id_t segment_id,
9010 9010 rsm_resource_callback_t callback,
9011 9011 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
9012 9012 {
9013 9013 return (RSM_SUCCESS);
9014 9014 }
9015 9015
9016 9016
9017 9017 int
9018 9018 rsmka_null_republish(
9019 9019 rsm_memseg_export_handle_t memseg,
9020 9020 rsm_access_entry_t access_list[],
9021 9021 uint_t access_list_length,
9022 9022 rsm_resource_callback_t callback,
9023 9023 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
9024 9024 {
9025 9025 return (RSM_SUCCESS);
9026 9026 }
9027 9027
9028 9028 int
9029 9029 rsmka_null_unpublish(
9030 9030 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/)
9031 9031 {
9032 9032 return (RSM_SUCCESS);
9033 9033 }
9034 9034
9035 9035
9036 9036 void
9037 9037 rsmka_init_loopback()
9038 9038 {
9039 9039 rsm_ops_t *ops = &null_rsmpi_ops;
9040 9040 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_LOOPBACK);
9041 9041
9042 9042 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9043 9043 "rsmka_init_loopback enter\n"));
9044 9044
9045 9045 /* initialize null ops vector */
9046 9046 ops->rsm_seg_create = rsmka_null_seg_create;
9047 9047 ops->rsm_seg_destroy = rsmka_null_seg_destroy;
9048 9048 ops->rsm_bind = rsmka_null_bind;
9049 9049 ops->rsm_unbind = rsmka_null_unbind;
9050 9050 ops->rsm_rebind = rsmka_null_rebind;
9051 9051 ops->rsm_publish = rsmka_null_publish;
9052 9052 ops->rsm_unpublish = rsmka_null_unpublish;
9053 9053 ops->rsm_republish = rsmka_null_republish;
9054 9054
9055 9055 /* initialize attributes for loopback adapter */
9056 9056 loopback_attr.attr_name = loopback_str;
9057 9057 loopback_attr.attr_page_size = 0x8; /* 8K */
9058 9058
9059 9059 /* initialize loopback adapter */
9060 9060 loopback_adapter.rsm_attr = loopback_attr;
9061 9061 loopback_adapter.rsmpi_ops = &null_rsmpi_ops;
9062 9062 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9063 9063 "rsmka_init_loopback done\n"));
9064 9064 }
9065 9065
9066 9066 /* ************** DR functions ********************************** */
9067 9067 static void
9068 9068 rsm_quiesce_exp_seg(rsmresource_t *resp)
9069 9069 {
9070 9070 int recheck_state;
9071 9071 rsmseg_t *segp = (rsmseg_t *)resp;
9072 9072 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9073 9073 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9074 9074
9075 9075 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9076 9076 "%s enter: key=%u\n", function, segp->s_key));
9077 9077
9078 9078 rsmseglock_acquire(segp);
9079 9079 do {
9080 9080 recheck_state = 0;
9081 9081 if ((segp->s_state == RSM_STATE_NEW_QUIESCED) ||
9082 9082 (segp->s_state == RSM_STATE_BIND_QUIESCED) ||
9083 9083 (segp->s_state == RSM_STATE_EXPORT_QUIESCING) ||
9084 9084 (segp->s_state == RSM_STATE_EXPORT_QUIESCED)) {
9085 9085 rsmseglock_release(segp);
9086 9086 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9087 9087 "%s done:state =%d\n", function,
9088 9088 segp->s_state));
9089 9089 return;
9090 9090 }
9091 9091
9092 9092 if (segp->s_state == RSM_STATE_NEW) {
9093 9093 segp->s_state = RSM_STATE_NEW_QUIESCED;
9094 9094 rsmseglock_release(segp);
9095 9095 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9096 9096 "%s done:state =%d\n", function,
9097 9097 segp->s_state));
9098 9098 return;
9099 9099 }
9100 9100
9101 9101 if (segp->s_state == RSM_STATE_BIND) {
9102 9102 /* unbind */
9103 9103 (void) rsm_unbind_pages(segp);
9104 9104 segp->s_state = RSM_STATE_BIND_QUIESCED;
9105 9105 rsmseglock_release(segp);
9106 9106 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9107 9107 "%s done:state =%d\n", function,
9108 9108 segp->s_state));
9109 9109 return;
9110 9110 }
9111 9111
9112 9112 if (segp->s_state == RSM_STATE_EXPORT) {
9113 9113 /*
9114 9114 * wait for putv/getv to complete if the segp is
9115 9115 * a local memory handle
9116 9116 */
9117 9117 while ((segp->s_state == RSM_STATE_EXPORT) &&
9118 9118 (segp->s_rdmacnt != 0)) {
9119 9119 cv_wait(&segp->s_cv, &segp->s_lock);
9120 9120 }
9121 9121
9122 9122 if (segp->s_state != RSM_STATE_EXPORT) {
9123 9123 /*
9124 9124 * state changed need to see what it
9125 9125 * should be changed to.
9126 9126 */
9127 9127 recheck_state = 1;
9128 9128 continue;
9129 9129 }
9130 9130
9131 9131 segp->s_state = RSM_STATE_EXPORT_QUIESCING;
9132 9132 rsmseglock_release(segp);
9133 9133 /*
9134 9134 * send SUSPEND messages - currently it will be
9135 9135 * done at the end
9136 9136 */
9137 9137 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9138 9138 "%s done:state =%d\n", function,
9139 9139 segp->s_state));
9140 9140 return;
9141 9141 }
9142 9142 } while (recheck_state);
9143 9143
9144 9144 rsmseglock_release(segp);
9145 9145 }
9146 9146
9147 9147 static void
9148 9148 rsm_unquiesce_exp_seg(rsmresource_t *resp)
9149 9149 {
9150 9150 int ret;
9151 9151 rsmseg_t *segp = (rsmseg_t *)resp;
9152 9152 rsmapi_access_entry_t *acl;
9153 9153 rsm_access_entry_t *rsmpi_acl;
9154 9154 int acl_len;
9155 9155 int create_flags = 0;
9156 9156 struct buf *xbuf;
9157 9157 rsm_memory_local_t mem;
9158 9158 adapter_t *adapter;
9159 9159 dev_t sdev = 0;
9160 9160 rsm_resource_callback_t callback_flag;
9161 9161 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9162 9162 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9163 9163
9164 9164 rsmseglock_acquire(segp);
9165 9165
9166 9166 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9167 9167 "%s enter: key=%u, state=%d\n", function, segp->s_key,
9168 9168 segp->s_state));
9169 9169
9170 9170 if ((segp->s_state == RSM_STATE_NEW) ||
9171 9171 (segp->s_state == RSM_STATE_BIND) ||
9172 9172 (segp->s_state == RSM_STATE_EXPORT)) {
9173 9173 rsmseglock_release(segp);
9174 9174 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9175 9175 function, segp->s_state));
9176 9176 return;
9177 9177 }
9178 9178
9179 9179 if (segp->s_state == RSM_STATE_NEW_QUIESCED) {
9180 9180 segp->s_state = RSM_STATE_NEW;
9181 9181 cv_broadcast(&segp->s_cv);
9182 9182 rsmseglock_release(segp);
9183 9183 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9184 9184 function, segp->s_state));
9185 9185 return;
9186 9186 }
9187 9187
9188 9188 if (segp->s_state == RSM_STATE_BIND_QUIESCED) {
9189 9189 /* bind the segment */
9190 9190 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9191 9191 segp->s_len, segp->s_proc);
9192 9192 if (ret == RSM_SUCCESS) { /* bind successful */
9193 9193 segp->s_state = RSM_STATE_BIND;
9194 9194 } else { /* bind failed - resource unavailable */
9195 9195 segp->s_state = RSM_STATE_NEW;
9196 9196 }
9197 9197 cv_broadcast(&segp->s_cv);
9198 9198 rsmseglock_release(segp);
9199 9199 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9200 9200 "%s done: bind_qscd bind = %d\n", function, ret));
9201 9201 return;
9202 9202 }
9203 9203
9204 9204 while (segp->s_state == RSM_STATE_EXPORT_QUIESCING) {
9205 9205 /* wait for the segment to move to EXPORT_QUIESCED state */
9206 9206 cv_wait(&segp->s_cv, &segp->s_lock);
9207 9207 }
9208 9208
9209 9209 if (segp->s_state == RSM_STATE_EXPORT_QUIESCED) {
9210 9210 /* bind the segment */
9211 9211 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9212 9212 segp->s_len, segp->s_proc);
9213 9213
9214 9214 if (ret != RSM_SUCCESS) {
9215 9215 /* bind failed - resource unavailable */
9216 9216 acl_len = segp->s_acl_len;
9217 9217 acl = segp->s_acl;
9218 9218 rsmpi_acl = segp->s_acl_in;
9219 9219 segp->s_acl_len = 0;
9220 9220 segp->s_acl = NULL;
9221 9221 segp->s_acl_in = NULL;
9222 9222 rsmseglock_release(segp);
9223 9223
9224 9224 rsmexport_rm(segp);
9225 9225 rsmacl_free(acl, acl_len);
9226 9226 rsmpiacl_free(rsmpi_acl, acl_len);
9227 9227
9228 9228 rsmseglock_acquire(segp);
9229 9229 segp->s_state = RSM_STATE_NEW;
9230 9230 cv_broadcast(&segp->s_cv);
9231 9231 rsmseglock_release(segp);
9232 9232 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9233 9233 "%s done: exp_qscd bind failed = %d\n",
9234 9234 function, ret));
9235 9235 return;
9236 9236 }
9237 9237 /*
9238 9238 * publish the segment
9239 9239 * if successful
9240 9240 * segp->s_state = RSM_STATE_EXPORT;
9241 9241 * else failed
9242 9242 * segp->s_state = RSM_STATE_BIND;
9243 9243 */
9244 9244
9245 9245 /* check whether it is a local_memory_handle */
9246 9246 if (segp->s_acl != (rsmapi_access_entry_t *)NULL) {
9247 9247 if ((segp->s_acl[0].ae_node == my_nodeid) &&
9248 9248 (segp->s_acl[0].ae_permission == 0)) {
9249 9249 segp->s_state = RSM_STATE_EXPORT;
9250 9250 cv_broadcast(&segp->s_cv);
9251 9251 rsmseglock_release(segp);
9252 9252 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9253 9253 "%s done:exp_qscd\n", function));
9254 9254 return;
9255 9255 }
9256 9256 }
9257 9257 xbuf = ddi_umem_iosetup(segp->s_cookie, 0, segp->s_len, B_WRITE,
9258 9258 sdev, 0, NULL, DDI_UMEM_SLEEP);
9259 9259 ASSERT(xbuf != NULL);
9260 9260
9261 9261 mem.ms_type = RSM_MEM_BUF;
9262 9262 mem.ms_bp = xbuf;
9263 9263
9264 9264 adapter = segp->s_adapter;
9265 9265
9266 9266 if (segp->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
9267 9267 create_flags = RSM_ALLOW_UNBIND_REBIND;
9268 9268 }
9269 9269
9270 9270 if (segp->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
9271 9271 callback_flag = RSM_RESOURCE_DONTWAIT;
9272 9272 } else {
9273 9273 callback_flag = RSM_RESOURCE_SLEEP;
9274 9274 }
9275 9275
9276 9276 ret = adapter->rsmpi_ops->rsm_seg_create(
9277 9277 adapter->rsmpi_handle, &segp->s_handle.out,
9278 9278 segp->s_len, create_flags, &mem,
9279 9279 callback_flag, NULL);
9280 9280
9281 9281 if (ret != RSM_SUCCESS) {
9282 9282 acl_len = segp->s_acl_len;
9283 9283 acl = segp->s_acl;
9284 9284 rsmpi_acl = segp->s_acl_in;
9285 9285 segp->s_acl_len = 0;
9286 9286 segp->s_acl = NULL;
9287 9287 segp->s_acl_in = NULL;
9288 9288 rsmseglock_release(segp);
9289 9289
9290 9290 rsmexport_rm(segp);
9291 9291 rsmacl_free(acl, acl_len);
9292 9292 rsmpiacl_free(rsmpi_acl, acl_len);
9293 9293
9294 9294 rsmseglock_acquire(segp);
9295 9295 segp->s_state = RSM_STATE_BIND;
9296 9296 cv_broadcast(&segp->s_cv);
9297 9297 rsmseglock_release(segp);
9298 9298 DBG_PRINTF((category, RSM_ERR,
9299 9299 "%s done: exp_qscd create failed = %d\n",
9300 9300 function, ret));
9301 9301 return;
9302 9302 }
9303 9303
9304 9304 ret = adapter->rsmpi_ops->rsm_publish(
9305 9305 segp->s_handle.out, segp->s_acl_in, segp->s_acl_len,
9306 9306 segp->s_segid, RSM_RESOURCE_DONTWAIT, NULL);
9307 9307
9308 9308 if (ret != RSM_SUCCESS) {
9309 9309 acl_len = segp->s_acl_len;
9310 9310 acl = segp->s_acl;
9311 9311 rsmpi_acl = segp->s_acl_in;
9312 9312 segp->s_acl_len = 0;
9313 9313 segp->s_acl = NULL;
9314 9314 segp->s_acl_in = NULL;
9315 9315 adapter->rsmpi_ops->rsm_seg_destroy(segp->s_handle.out);
9316 9316 rsmseglock_release(segp);
9317 9317
9318 9318 rsmexport_rm(segp);
9319 9319 rsmacl_free(acl, acl_len);
9320 9320 rsmpiacl_free(rsmpi_acl, acl_len);
9321 9321
9322 9322 rsmseglock_acquire(segp);
9323 9323 segp->s_state = RSM_STATE_BIND;
9324 9324 cv_broadcast(&segp->s_cv);
9325 9325 rsmseglock_release(segp);
9326 9326 DBG_PRINTF((category, RSM_ERR,
9327 9327 "%s done: exp_qscd publish failed = %d\n",
9328 9328 function, ret));
9329 9329 return;
9330 9330 }
9331 9331
9332 9332 segp->s_state = RSM_STATE_EXPORT;
9333 9333 cv_broadcast(&segp->s_cv);
9334 9334 rsmseglock_release(segp);
9335 9335 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done: exp_qscd\n",
9336 9336 function));
9337 9337 return;
9338 9338 }
9339 9339
9340 9340 rsmseglock_release(segp);
9341 9341
9342 9342 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9343 9343 }
9344 9344
9345 9345 static void
9346 9346 rsm_quiesce_imp_seg(rsmresource_t *resp)
9347 9347 {
9348 9348 rsmseg_t *segp = (rsmseg_t *)resp;
9349 9349 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9350 9350 DBG_DEFINE_STR(function, "rsm_quiesce_imp_seg");
9351 9351
9352 9352 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9353 9353 "%s enter: key=%u\n", function, segp->s_key));
9354 9354
9355 9355 rsmseglock_acquire(segp);
9356 9356 segp->s_flags |= RSM_DR_INPROGRESS;
9357 9357
9358 9358 while (segp->s_rdmacnt != 0) {
9359 9359 /* wait for the RDMA to complete */
9360 9360 cv_wait(&segp->s_cv, &segp->s_lock);
9361 9361 }
9362 9362
9363 9363 rsmseglock_release(segp);
9364 9364
9365 9365 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9366 9366
9367 9367 }
9368 9368
9369 9369 static void
9370 9370 rsm_unquiesce_imp_seg(rsmresource_t *resp)
9371 9371 {
9372 9372 rsmseg_t *segp = (rsmseg_t *)resp;
9373 9373 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9374 9374 DBG_DEFINE_STR(function, "rsm_unquiesce_imp_seg");
9375 9375
9376 9376 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9377 9377 "%s enter: key=%u\n", function, segp->s_key));
9378 9378
9379 9379 rsmseglock_acquire(segp);
9380 9380
9381 9381 segp->s_flags &= ~RSM_DR_INPROGRESS;
9382 9382 /* wake up any waiting putv/getv ops */
9383 9383 cv_broadcast(&segp->s_cv);
9384 9384
9385 9385 rsmseglock_release(segp);
9386 9386
9387 9387 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9388 9388
9389 9389
9390 9390 }
9391 9391
9392 9392 static void
9393 9393 rsm_process_exp_seg(rsmresource_t *resp, int event)
9394 9394 {
9395 9395 if (event == RSM_DR_QUIESCE)
9396 9396 rsm_quiesce_exp_seg(resp);
9397 9397 else /* UNQUIESCE */
9398 9398 rsm_unquiesce_exp_seg(resp);
9399 9399 }
9400 9400
9401 9401 static void
9402 9402 rsm_process_imp_seg(rsmresource_t *resp, int event)
9403 9403 {
9404 9404 if (event == RSM_DR_QUIESCE)
9405 9405 rsm_quiesce_imp_seg(resp);
9406 9406 else /* UNQUIESCE */
9407 9407 rsm_unquiesce_imp_seg(resp);
9408 9408 }
9409 9409
9410 9410 static void
9411 9411 rsm_dr_process_local_segments(int event)
9412 9412 {
9413 9413
9414 9414 int i, j;
9415 9415 rsmresource_blk_t *blk;
9416 9416 rsmresource_t *p;
9417 9417 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9418 9418
9419 9419 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9420 9420 "rsm_dr_process_local_segments enter\n"));
9421 9421
9422 9422 /* iterate through the resource structure */
9423 9423
9424 9424 rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
9425 9425
9426 9426 for (i = 0; i < rsm_resource.rsmrc_len; i++) {
9427 9427 blk = rsm_resource.rsmrc_root[i];
9428 9428 if (blk != NULL) {
9429 9429 for (j = 0; j < RSMRC_BLKSZ; j++) {
9430 9430 p = blk->rsmrcblk_blks[j];
9431 9431 if ((p != NULL) && (p != RSMRC_RESERVED)) {
9432 9432 /* valid resource */
9433 9433 if (p->rsmrc_type ==
9434 9434 RSM_RESOURCE_EXPORT_SEGMENT)
9435 9435 rsm_process_exp_seg(p, event);
9436 9436 else if (p->rsmrc_type ==
9437 9437 RSM_RESOURCE_IMPORT_SEGMENT)
9438 9438 rsm_process_imp_seg(p, event);
9439 9439 }
9440 9440 }
9441 9441 }
9442 9442 }
9443 9443
9444 9444 rw_exit(&rsm_resource.rsmrc_lock);
9445 9445
9446 9446 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9447 9447 "rsm_dr_process_local_segments done\n"));
9448 9448 }
9449 9449
9450 9450 /* *************** DR callback functions ************ */
9451 9451 static void
9452 9452 rsm_dr_callback_post_add(void *arg, pgcnt_t delta /* ARGSUSED */)
9453 9453 {
9454 9454 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9455 9455 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9456 9456 "rsm_dr_callback_post_add is a no-op\n"));
9457 9457 /* Noop */
9458 9458 }
9459 9459
9460 9460 static int
9461 9461 rsm_dr_callback_pre_del(void *arg, pgcnt_t delta /* ARGSUSED */)
9462 9462 {
9463 9463 int recheck_state = 0;
9464 9464 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9465 9465
9466 9466 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9467 9467 "rsm_dr_callback_pre_del enter\n"));
9468 9468
9469 9469 mutex_enter(&rsm_drv_data.drv_lock);
9470 9470
9471 9471 do {
9472 9472 recheck_state = 0;
9473 9473 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9474 9474 "rsm_dr_callback_pre_del:state=%d\n",
9475 9475 rsm_drv_data.drv_state));
9476 9476
9477 9477 switch (rsm_drv_data.drv_state) {
9478 9478 case RSM_DRV_NEW:
9479 9479 /*
9480 9480 * The state should usually never be RSM_DRV_NEW
9481 9481 * since in this state the callbacks have not yet
9482 9482 * been registered. So, ASSERT.
9483 9483 */
9484 9484 ASSERT(0);
9485 9485 return (0);
9486 9486 case RSM_DRV_REG_PROCESSING:
9487 9487 /*
9488 9488 * The driver is in the process of registering
9489 9489 * with the DR framework. So, wait till the
9490 9490 * registration process is complete.
9491 9491 */
9492 9492 recheck_state = 1;
9493 9493 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9494 9494 break;
9495 9495 case RSM_DRV_UNREG_PROCESSING:
9496 9496 /*
9497 9497 * If the state is RSM_DRV_UNREG_PROCESSING, the
9498 9498 * module is in the process of detaching and
9499 9499 * unregistering the callbacks from the DR
9500 9500 * framework. So, simply return.
9501 9501 */
9502 9502 mutex_exit(&rsm_drv_data.drv_lock);
9503 9503 DBG_PRINTF((category, RSM_DEBUG,
9504 9504 "rsm_dr_callback_pre_del:"
9505 9505 "pre-del on NEW/UNREG\n"));
9506 9506 return (0);
9507 9507 case RSM_DRV_OK:
9508 9508 rsm_drv_data.drv_state = RSM_DRV_PREDEL_STARTED;
9509 9509 break;
9510 9510 case RSM_DRV_PREDEL_STARTED:
9511 9511 /* FALLTHRU */
9512 9512 case RSM_DRV_PREDEL_COMPLETED:
9513 9513 /* FALLTHRU */
9514 9514 case RSM_DRV_POSTDEL_IN_PROGRESS:
9515 9515 recheck_state = 1;
9516 9516 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9517 9517 break;
9518 9518 case RSM_DRV_DR_IN_PROGRESS:
9519 9519 rsm_drv_data.drv_memdel_cnt++;
9520 9520 mutex_exit(&rsm_drv_data.drv_lock);
9521 9521 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9522 9522 "rsm_dr_callback_pre_del done\n"));
9523 9523 return (0);
9524 9524 /* break; */
9525 9525 default:
9526 9526 ASSERT(0);
9527 9527 break;
9528 9528 }
9529 9529
9530 9530 } while (recheck_state);
9531 9531
9532 9532 rsm_drv_data.drv_memdel_cnt++;
9533 9533
9534 9534 mutex_exit(&rsm_drv_data.drv_lock);
9535 9535
9536 9536 /* Do all the quiescing stuff here */
9537 9537 DBG_PRINTF((category, RSM_DEBUG,
9538 9538 "rsm_dr_callback_pre_del: quiesce things now\n"));
9539 9539
9540 9540 rsm_dr_process_local_segments(RSM_DR_QUIESCE);
9541 9541
9542 9542 /*
9543 9543 * now that all local segments have been quiesced lets inform
9544 9544 * the importers
9545 9545 */
9546 9546 rsm_send_suspend();
9547 9547
9548 9548 /*
9549 9549 * In response to the suspend message the remote node(s) will process
9550 9550 * the segments and send a suspend_complete message. Till all
9551 9551 * the nodes send the suspend_complete message we wait in the
9552 9552 * RSM_DRV_PREDEL_STARTED state. In the exporter_quiesce
9553 9553 * function we transition to the RSM_DRV_PREDEL_COMPLETED state.
9554 9554 */
9555 9555 mutex_enter(&rsm_drv_data.drv_lock);
9556 9556
9557 9557 while (rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED) {
9558 9558 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9559 9559 }
9560 9560
9561 9561 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_COMPLETED);
9562 9562
9563 9563 rsm_drv_data.drv_state = RSM_DRV_DR_IN_PROGRESS;
9564 9564 cv_broadcast(&rsm_drv_data.drv_cv);
9565 9565
9566 9566 mutex_exit(&rsm_drv_data.drv_lock);
9567 9567
9568 9568 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9569 9569 "rsm_dr_callback_pre_del done\n"));
9570 9570
9571 9571 return (0);
9572 9572 }
9573 9573
9574 9574 static void
9575 9575 rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled /* ARGSUSED */)
9576 9576 {
9577 9577 int recheck_state = 0;
9578 9578 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9579 9579
9580 9580 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9581 9581 "rsm_dr_callback_post_del enter\n"));
9582 9582
9583 9583 mutex_enter(&rsm_drv_data.drv_lock);
9584 9584
9585 9585 do {
9586 9586 recheck_state = 0;
9587 9587 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9588 9588 "rsm_dr_callback_post_del:state=%d\n",
9589 9589 rsm_drv_data.drv_state));
9590 9590
9591 9591 switch (rsm_drv_data.drv_state) {
9592 9592 case RSM_DRV_NEW:
9593 9593 /*
9594 9594 * The driver state cannot not be RSM_DRV_NEW
9595 9595 * since in this state the callbacks have not
9596 9596 * yet been registered.
9597 9597 */
9598 9598 ASSERT(0);
9599 9599 return;
9600 9600 case RSM_DRV_REG_PROCESSING:
9601 9601 /*
9602 9602 * The driver is in the process of registering with
9603 9603 * the DR framework. Wait till the registration is
9604 9604 * complete.
9605 9605 */
9606 9606 recheck_state = 1;
9607 9607 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9608 9608 break;
9609 9609 case RSM_DRV_UNREG_PROCESSING:
9610 9610 /*
9611 9611 * RSM_DRV_UNREG_PROCESSING state means the module
9612 9612 * is detaching and unregistering the callbacks
9613 9613 * from the DR framework. So simply return.
9614 9614 */
9615 9615 /* FALLTHRU */
9616 9616 case RSM_DRV_OK:
9617 9617 /*
9618 9618 * RSM_DRV_OK means we missed the pre-del
9619 9619 * corresponding to this post-del coz we had not
9620 9620 * registered yet, so simply return.
9621 9621 */
9622 9622 mutex_exit(&rsm_drv_data.drv_lock);
9623 9623 DBG_PRINTF((category, RSM_DEBUG,
9624 9624 "rsm_dr_callback_post_del:"
9625 9625 "post-del on OK/UNREG\n"));
9626 9626 return;
9627 9627 /* break; */
9628 9628 case RSM_DRV_PREDEL_STARTED:
9629 9629 /* FALLTHRU */
9630 9630 case RSM_DRV_PREDEL_COMPLETED:
9631 9631 /* FALLTHRU */
9632 9632 case RSM_DRV_POSTDEL_IN_PROGRESS:
9633 9633 recheck_state = 1;
9634 9634 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9635 9635 break;
9636 9636 case RSM_DRV_DR_IN_PROGRESS:
9637 9637 rsm_drv_data.drv_memdel_cnt--;
9638 9638 if (rsm_drv_data.drv_memdel_cnt > 0) {
9639 9639 mutex_exit(&rsm_drv_data.drv_lock);
9640 9640 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9641 9641 "rsm_dr_callback_post_del done:\n"));
9642 9642 return;
9643 9643 }
9644 9644 rsm_drv_data.drv_state = RSM_DRV_POSTDEL_IN_PROGRESS;
9645 9645 break;
9646 9646 default:
9647 9647 ASSERT(0);
9648 9648 return;
9649 9649 /* break; */
9650 9650 }
9651 9651 } while (recheck_state);
9652 9652
9653 9653 mutex_exit(&rsm_drv_data.drv_lock);
9654 9654
9655 9655 /* Do all the unquiescing stuff here */
9656 9656 DBG_PRINTF((category, RSM_DEBUG,
9657 9657 "rsm_dr_callback_post_del: unquiesce things now\n"));
9658 9658
9659 9659 rsm_dr_process_local_segments(RSM_DR_UNQUIESCE);
9660 9660
9661 9661 /*
9662 9662 * now that all local segments have been unquiesced lets inform
9663 9663 * the importers
9664 9664 */
9665 9665 rsm_send_resume();
9666 9666
9667 9667 mutex_enter(&rsm_drv_data.drv_lock);
9668 9668
9669 9669 rsm_drv_data.drv_state = RSM_DRV_OK;
9670 9670
9671 9671 cv_broadcast(&rsm_drv_data.drv_cv);
9672 9672
9673 9673 mutex_exit(&rsm_drv_data.drv_lock);
9674 9674
9675 9675 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9676 9676 "rsm_dr_callback_post_del done\n"));
9677 9677
9678 9678 return;
9679 9679
9680 9680 }
↓ open down ↓ |
2383 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX