Print this page
5255 uts shouldn't open-code ISP2
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/ib/adapters/tavor/tavor_qp.c
+++ new/usr/src/uts/common/io/ib/adapters/tavor/tavor_qp.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 /*
28 28 * tavor_qp.c
29 29 * Tavor Queue Pair Processing Routines
30 30 *
31 31 * Implements all the routines necessary for allocating, freeing, and
32 32 * querying the Tavor queue pairs.
33 33 */
34 34
35 35 #include <sys/types.h>
36 36 #include <sys/conf.h>
37 37 #include <sys/ddi.h>
38 38 #include <sys/sunddi.h>
39 39 #include <sys/modctl.h>
40 40 #include <sys/bitmap.h>
41 41 #include <sys/sysmacros.h>
42 42
43 43 #include <sys/ib/adapters/tavor/tavor.h>
44 44 #include <sys/ib/ib_pkt_hdrs.h>
45 45
46 46 static int tavor_qp_create_qpn(tavor_state_t *state, tavor_qphdl_t qp,
47 47 tavor_rsrc_t *qpc);
48 48 static int tavor_qpn_avl_compare(const void *q, const void *e);
49 49 static int tavor_special_qp_rsrc_alloc(tavor_state_t *state,
50 50 ibt_sqp_type_t type, uint_t port, tavor_rsrc_t **qp_rsrc);
51 51 static int tavor_special_qp_rsrc_free(tavor_state_t *state, ibt_sqp_type_t type,
52 52 uint_t port);
53 53 static void tavor_qp_sgl_to_logwqesz(tavor_state_t *state, uint_t num_sgl,
54 54 tavor_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl);
55 55
56 56 /*
57 57 * tavor_qp_alloc()
58 58 * Context: Can be called only from user or kernel context.
59 59 */
60 60 int
61 61 tavor_qp_alloc(tavor_state_t *state, tavor_qp_info_t *qpinfo,
62 62 uint_t sleepflag, tavor_qp_options_t *op)
63 63 {
64 64 tavor_rsrc_pool_info_t *rsrc_pool;
65 65 tavor_rsrc_t *qpc, *rsrc, *rdb;
66 66 tavor_umap_db_entry_t *umapdb;
67 67 tavor_qphdl_t qp;
68 68 ibt_qp_alloc_attr_t *attr_p;
69 69 ibt_qp_type_t type;
70 70 ibtl_qp_hdl_t ibt_qphdl;
71 71 ibt_chan_sizes_t *queuesz_p;
72 72 ib_qpn_t *qpn;
73 73 tavor_qphdl_t *qphdl;
74 74 ibt_mr_attr_t mr_attr;
75 75 tavor_mr_options_t mr_op;
76 76 tavor_srqhdl_t srq;
77 77 tavor_pdhdl_t pd;
78 78 tavor_cqhdl_t sq_cq, rq_cq;
79 79 tavor_mrhdl_t mr;
80 80 uint64_t value, qp_desc_off;
81 81 uint32_t *sq_buf, *rq_buf;
82 82 uint32_t log_qp_sq_size, log_qp_rq_size;
83 83 uint32_t sq_size, rq_size;
84 84 uint32_t sq_wqe_size, rq_wqe_size;
85 85 uint32_t max_rdb, max_sgl, uarpg;
86 86 uint_t wq_location, dma_xfer_mode, qp_is_umap;
87 87 uint_t qp_srq_en;
88 88 int status, flag;
89 89 char *errormsg;
90 90
91 91 TAVOR_TNF_ENTER(tavor_qp_alloc);
92 92
93 93 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p, *queuesz_p))
94 94
95 95 /*
96 96 * Check the "options" flag. Currently this flag tells the driver
97 97 * whether or not the QP's work queues should be come from normal
98 98 * system memory or whether they should be allocated from DDR memory.
99 99 */
100 100 if (op == NULL) {
101 101 wq_location = TAVOR_QUEUE_LOCATION_NORMAL;
102 102 } else {
103 103 wq_location = op->qpo_wq_loc;
104 104 }
105 105
106 106 /*
107 107 * Extract the necessary info from the tavor_qp_info_t structure
108 108 */
109 109 attr_p = qpinfo->qpi_attrp;
110 110 type = qpinfo->qpi_type;
111 111 ibt_qphdl = qpinfo->qpi_ibt_qphdl;
112 112 queuesz_p = qpinfo->qpi_queueszp;
113 113 qpn = qpinfo->qpi_qpn;
114 114 qphdl = &qpinfo->qpi_qphdl;
115 115
116 116 /*
117 117 * Determine whether QP is being allocated for userland access or
118 118 * whether it is being allocated for kernel access. If the QP is
119 119 * being allocated for userland access, then lookup the UAR doorbell
120 120 * page number for the current process. Note: If this is not found
121 121 * (e.g. if the process has not previously open()'d the Tavor driver),
122 122 * then an error is returned.
123 123 */
124 124 qp_is_umap = (attr_p->qp_alloc_flags & IBT_QP_USER_MAP) ? 1 : 0;
125 125 if (qp_is_umap) {
126 126 status = tavor_umap_db_find(state->ts_instance, ddi_get_pid(),
127 127 MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
128 128 if (status != DDI_SUCCESS) {
129 129 /* Set "status" and "errormsg" and goto failure */
130 130 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "failed UAR page");
131 131 goto qpalloc_fail;
132 132 }
133 133 uarpg = ((tavor_rsrc_t *)(uintptr_t)value)->tr_indx;
134 134 }
135 135
136 136 /*
137 137 * Determine whether QP is being associated with an SRQ
138 138 */
139 139 qp_srq_en = (attr_p->qp_alloc_flags & IBT_QP_USES_SRQ) ? 1 : 0;
140 140 if (qp_srq_en) {
141 141 /*
142 142 * Check for valid SRQ handle pointers
143 143 */
144 144 if (attr_p->qp_ibc_srq_hdl == NULL) {
145 145 /* Set "status" and "errormsg" and goto failure */
146 146 TAVOR_TNF_FAIL(IBT_SRQ_HDL_INVALID,
147 147 "invalid SRQ handle");
148 148 goto qpalloc_fail;
149 149 }
150 150 srq = (tavor_srqhdl_t)attr_p->qp_ibc_srq_hdl;
151 151 }
152 152
153 153 /*
154 154 * Check for valid QP service type (only UD/RC/UC supported)
155 155 */
156 156 if (((type != IBT_UD_RQP) && (type != IBT_RC_RQP) &&
157 157 (type != IBT_UC_RQP))) {
158 158 /* Set "status" and "errormsg" and goto failure */
159 159 TAVOR_TNF_FAIL(IBT_QP_SRV_TYPE_INVALID, "invalid serv type");
160 160 goto qpalloc_fail;
161 161 }
162 162
163 163 /*
164 164 * Only RC is supported on an SRQ -- This is a Tavor hardware
165 165 * limitation. Arbel native mode will not have this shortcoming.
166 166 */
167 167 if (qp_srq_en && type != IBT_RC_RQP) {
168 168 /* Set "status" and "errormsg" and goto failure */
169 169 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid serv type with SRQ");
170 170 goto qpalloc_fail;
171 171 }
172 172
173 173 /*
174 174 * Check for valid PD handle pointer
175 175 */
176 176 if (attr_p->qp_pd_hdl == NULL) {
177 177 /* Set "status" and "errormsg" and goto failure */
178 178 TAVOR_TNF_FAIL(IBT_PD_HDL_INVALID, "invalid PD handle");
179 179 goto qpalloc_fail;
180 180 }
181 181 pd = (tavor_pdhdl_t)attr_p->qp_pd_hdl;
182 182
183 183 /*
184 184 * If on an SRQ, check to make sure the PD is the same
185 185 */
186 186 if (qp_srq_en && (pd->pd_pdnum != srq->srq_pdhdl->pd_pdnum)) {
187 187 /* Set "status" and "errormsg" and goto failure */
188 188 TAVOR_TNF_FAIL(IBT_PD_HDL_INVALID, "invalid PD handle");
189 189 goto qpalloc_fail;
190 190 }
191 191
192 192 /* Increment the reference count on the protection domain (PD) */
193 193 tavor_pd_refcnt_inc(pd);
194 194
195 195 /*
196 196 * Check for valid CQ handle pointers
197 197 */
198 198 if ((attr_p->qp_ibc_scq_hdl == NULL) ||
199 199 (attr_p->qp_ibc_rcq_hdl == NULL)) {
200 200 /* Set "status" and "errormsg" and goto failure */
201 201 TAVOR_TNF_FAIL(IBT_CQ_HDL_INVALID, "invalid CQ handle");
202 202 goto qpalloc_fail1;
203 203 }
204 204 sq_cq = (tavor_cqhdl_t)attr_p->qp_ibc_scq_hdl;
205 205 rq_cq = (tavor_cqhdl_t)attr_p->qp_ibc_rcq_hdl;
206 206
207 207 /*
208 208 * Increment the reference count on the CQs. One or both of these
209 209 * could return error if we determine that the given CQ is already
210 210 * being used with a special (SMI/GSI) QP.
211 211 */
212 212 status = tavor_cq_refcnt_inc(sq_cq, TAVOR_CQ_IS_NORMAL);
213 213 if (status != DDI_SUCCESS) {
214 214 /* Set "status" and "errormsg" and goto failure */
215 215 TAVOR_TNF_FAIL(IBT_CQ_HDL_INVALID, "invalid CQ handle");
216 216 goto qpalloc_fail1;
217 217 }
218 218 status = tavor_cq_refcnt_inc(rq_cq, TAVOR_CQ_IS_NORMAL);
219 219 if (status != DDI_SUCCESS) {
220 220 /* Set "status" and "errormsg" and goto failure */
221 221 TAVOR_TNF_FAIL(IBT_CQ_HDL_INVALID, "invalid CQ handle");
222 222 goto qpalloc_fail2;
223 223 }
224 224
225 225 /*
226 226 * Allocate an QP context entry. This will be filled in with all
227 227 * the necessary parameters to define the Queue Pair. Unlike
228 228 * other Tavor hardware resources, ownership is not immediately
229 229 * given to hardware in the final step here. Instead, we must
230 230 * wait until the QP is later transitioned to the "Init" state before
231 231 * passing the QP to hardware. If we fail here, we must undo all
232 232 * the reference count (CQ and PD).
233 233 */
234 234 status = tavor_rsrc_alloc(state, TAVOR_QPC, 1, sleepflag, &qpc);
235 235 if (status != DDI_SUCCESS) {
236 236 /* Set "status" and "errormsg" and goto failure */
237 237 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed QP context");
238 238 goto qpalloc_fail3;
239 239 }
240 240
241 241 /*
242 242 * Allocate the software structure for tracking the queue pair
243 243 * (i.e. the Tavor Queue Pair handle). If we fail here, we must
244 244 * undo the reference counts and the previous resource allocation.
245 245 */
246 246 status = tavor_rsrc_alloc(state, TAVOR_QPHDL, 1, sleepflag, &rsrc);
247 247 if (status != DDI_SUCCESS) {
248 248 /* Set "status" and "errormsg" and goto failure */
249 249 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed QP handle");
250 250 goto qpalloc_fail4;
251 251 }
252 252 qp = (tavor_qphdl_t)rsrc->tr_addr;
253 253 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
254 254
255 255 /*
256 256 * Calculate the QP number from QPC index. This routine handles
257 257 * all of the operations necessary to keep track of used, unused,
258 258 * and released QP numbers.
259 259 */
260 260 status = tavor_qp_create_qpn(state, qp, qpc);
261 261 if (status != DDI_SUCCESS) {
262 262 /* Set "status" and "errormsg" and goto failure */
263 263 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed QPN create");
264 264 goto qpalloc_fail5;
265 265 }
266 266
267 267 /*
268 268 * If this will be a user-mappable QP, then allocate an entry for
269 269 * the "userland resources database". This will later be added to
270 270 * the database (after all further QP operations are successful).
271 271 * If we fail here, we must undo the reference counts and the
272 272 * previous resource allocation.
273 273 */
274 274 if (qp_is_umap) {
275 275 umapdb = tavor_umap_db_alloc(state->ts_instance, qp->qp_qpnum,
276 276 MLNX_UMAP_QPMEM_RSRC, (uint64_t)(uintptr_t)rsrc);
277 277 if (umapdb == NULL) {
278 278 /* Set "status" and "errormsg" and goto failure */
279 279 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add");
280 280 goto qpalloc_fail6;
281 281 }
282 282 }
283 283
284 284 /*
285 285 * If this is an RC QP, then pre-allocate the maximum number of RDB
286 286 * entries. This allows us to ensure that we can later cover all
287 287 * the resources needed by hardware for handling multiple incoming
288 288 * RDMA Reads. Note: These resources are obviously not always
289 289 * necessary. They are allocated here anyway. Someday maybe this
290 290 * can be modified to allocate these on-the-fly (i.e. only if RDMA
291 291 * Read or Atomic operations are enabled) XXX
292 292 * If we fail here, we have a bunch of resource and reference count
293 293 * cleanup to do.
294 294 */
295 295 if (type == IBT_RC_RQP) {
296 296 max_rdb = state->ts_cfg_profile->cp_hca_max_rdma_in_qp;
297 297 status = tavor_rsrc_alloc(state, TAVOR_RDB, max_rdb,
298 298 sleepflag, &rdb);
299 299 if (status != DDI_SUCCESS) {
300 300 /* Set "status" and "errormsg" and goto failure */
301 301 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed RDB");
302 302 goto qpalloc_fail7;
303 303 }
304 304 qp->qp_rdbrsrcp = rdb;
305 305 /* Calculate offset (into DDR memory) of RDB entries */
306 306 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_RDB];
307 307 qp->qp_rdb_ddraddr = (uintptr_t)rsrc_pool->rsrc_ddr_offset +
308 308 (rdb->tr_indx << TAVOR_RDB_SIZE_SHIFT);
309 309 }
↓ open down ↓ |
309 lines elided |
↑ open up ↑ |
310 310
311 311 /*
312 312 * Calculate the appropriate size for the work queues.
313 313 * Note: All Tavor QP work queues must be a power-of-2 in size. Also
314 314 * they may not be any smaller than TAVOR_QP_MIN_SIZE. This step is
315 315 * to round the requested size up to the next highest power-of-2
316 316 */
317 317 attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq, TAVOR_QP_MIN_SIZE);
318 318 attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq, TAVOR_QP_MIN_SIZE);
319 319 log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq);
320 - if ((attr_p->qp_sizes.cs_sq & (attr_p->qp_sizes.cs_sq - 1)) == 0) {
320 + if (ISP2(attr_p->qp_sizes.cs_sq)) {
321 321 log_qp_sq_size = log_qp_sq_size - 1;
322 322 }
323 323 log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
324 - if ((attr_p->qp_sizes.cs_rq & (attr_p->qp_sizes.cs_rq - 1)) == 0) {
324 + if (ISP2(attr_p->qp_sizes.cs_rq)) {
325 325 log_qp_rq_size = log_qp_rq_size - 1;
326 326 }
327 327
328 328 /*
329 329 * Next we verify that the rounded-up size is valid (i.e. consistent
330 330 * with the device limits and/or software-configured limits). If not,
331 331 * then obviously we have a lot of cleanup to do before returning.
332 332 */
333 333 if ((log_qp_sq_size > state->ts_cfg_profile->cp_log_max_qp_sz) ||
334 334 (!qp_srq_en && (log_qp_rq_size >
335 335 state->ts_cfg_profile->cp_log_max_qp_sz))) {
336 336 /* Set "status" and "errormsg" and goto failure */
337 337 TAVOR_TNF_FAIL(IBT_HCA_WR_EXCEEDED, "max QP size");
338 338 goto qpalloc_fail8;
339 339 }
340 340
341 341 /*
342 342 * Next we verify that the requested number of SGL is valid (i.e.
343 343 * consistent with the device limits and/or software-configured
344 344 * limits). If not, then obviously the same cleanup needs to be done.
345 345 */
346 346 max_sgl = state->ts_cfg_profile->cp_wqe_real_max_sgl;
347 347 if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
348 348 (!qp_srq_en && (attr_p->qp_sizes.cs_rq_sgl > max_sgl))) {
349 349 /* Set "status" and "errormsg" and goto failure */
350 350 TAVOR_TNF_FAIL(IBT_HCA_SGL_EXCEEDED, "max QP SGL");
351 351 goto qpalloc_fail8;
352 352 }
353 353
354 354 /*
355 355 * Determine this QP's WQE sizes (for both the Send and Recv WQEs).
356 356 * This will depend on the requested number of SGLs. Note: this
357 357 * has the side-effect of also calculating the real number of SGLs
358 358 * (for the calculated WQE size).
359 359 *
360 360 * For QP's on an SRQ, we set these to 0.
361 361 */
362 362 if (qp_srq_en) {
363 363 qp->qp_rq_log_wqesz = 0;
364 364 qp->qp_rq_sgl = 0;
365 365 } else {
366 366 tavor_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
367 367 TAVOR_QP_WQ_TYPE_RECVQ, &qp->qp_rq_log_wqesz,
368 368 &qp->qp_rq_sgl);
369 369 }
370 370 tavor_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
371 371 TAVOR_QP_WQ_TYPE_SENDQ, &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
372 372
373 373 /*
374 374 * Allocate the memory for QP work queues. Note: The location from
375 375 * which we will allocate these work queues has been passed in
376 376 * through the tavor_qp_options_t structure. Since Tavor work queues
377 377 * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
378 378 * the work queue memory is very important. We used to allocate
379 379 * work queues (the combined receive and send queues) so that they
380 380 * would be aligned on their combined size. That alignment guaranteed
381 381 * that they would never cross the 4GB boundary (Tavor work queues
382 382 * are on the order of MBs at maximum). Now we are able to relax
383 383 * this alignment constraint by ensuring that the IB address assigned
384 384 * to the queue memory (as a result of the tavor_mr_register() call)
385 385 * is offset from zero.
386 386 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
387 387 * guarantee the alignment, but when attempting to use IOMMU bypass
388 388 * mode we found that we were not allowed to specify any alignment
389 389 * that was more restrictive than the system page size.
390 390 * So we avoided this constraint by passing two alignment values,
391 391 * one for the memory allocation itself and the other for the DMA
392 392 * handle (for later bind). This used to cause more memory than
393 393 * necessary to be allocated (in order to guarantee the more
394 394 * restrictive alignment contraint). But be guaranteeing the
395 395 * zero-based IB virtual address for the queue, we are able to
396 396 * conserve this memory.
397 397 * Note: If QP is not user-mappable, then it may come from either
398 398 * kernel system memory or from HCA-attached local DDR memory.
399 399 */
400 400 sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
401 401 sq_size = (1 << log_qp_sq_size) * sq_wqe_size;
402 402
403 403 /* QP on SRQ sets these to 0 */
404 404 if (qp_srq_en) {
405 405 rq_wqe_size = 0;
406 406 rq_size = 0;
407 407 } else {
408 408 rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
409 409 rq_size = (1 << log_qp_rq_size) * rq_wqe_size;
410 410 }
411 411
412 412 qp->qp_wqinfo.qa_size = sq_size + rq_size;
413 413 qp->qp_wqinfo.qa_alloc_align = max(sq_wqe_size, rq_wqe_size);
414 414 qp->qp_wqinfo.qa_bind_align = max(sq_wqe_size, rq_wqe_size);
415 415 if (qp_is_umap) {
416 416 qp->qp_wqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND;
417 417 } else {
418 418 qp->qp_wqinfo.qa_location = wq_location;
419 419 }
420 420 status = tavor_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
421 421 if (status != DDI_SUCCESS) {
422 422 /* Set "status" and "errormsg" and goto failure */
423 423 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed work queue");
424 424 goto qpalloc_fail8;
425 425 }
426 426 if (sq_wqe_size > rq_wqe_size) {
427 427 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
428 428
429 429 /*
430 430 * If QP's on an SRQ, we set the rq_buf to NULL
431 431 */
432 432 if (qp_srq_en)
433 433 rq_buf = NULL;
434 434 else
435 435 rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
436 436 } else {
437 437 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
438 438 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
439 439 }
440 440
441 441 /*
442 442 * Register the memory for the QP work queues. The memory for the
443 443 * QP must be registered in the Tavor TPT tables. This gives us the
444 444 * LKey to specify in the QP context later. Note: The memory for
445 445 * Tavor work queues (both Send and Recv) must be contiguous and
446 446 * registered as a single memory region. Note also: If the work
447 447 * queue is to be allocated from DDR memory, then only a "bypass"
448 448 * mapping is appropriate. And if the QP memory is user-mappable,
449 449 * then we force DDI_DMA_CONSISTENT mapping.
450 450 * Also, in order to meet the alignment restriction, we pass the
451 451 * "mro_bind_override_addr" flag in the call to tavor_mr_register().
452 452 * This guarantees that the resulting IB vaddr will be zero-based
453 453 * (modulo the offset into the first page).
454 454 * If we fail here, we still have the bunch of resource and reference
455 455 * count cleanup to do.
456 456 */
457 457 flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP :
458 458 IBT_MR_NOSLEEP;
459 459 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
460 460 mr_attr.mr_len = qp->qp_wqinfo.qa_size;
461 461 mr_attr.mr_as = NULL;
462 462 mr_attr.mr_flags = flag;
463 463 if (qp_is_umap) {
464 464 mr_op.mro_bind_type = state->ts_cfg_profile->cp_iommu_bypass;
465 465 } else {
466 466 if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) {
467 467 mr_op.mro_bind_type =
468 468 state->ts_cfg_profile->cp_iommu_bypass;
469 469 dma_xfer_mode =
470 470 state->ts_cfg_profile->cp_streaming_consistent;
471 471 if (dma_xfer_mode == DDI_DMA_STREAMING) {
472 472 mr_attr.mr_flags |= IBT_MR_NONCOHERENT;
473 473 }
474 474 } else {
475 475 mr_op.mro_bind_type = TAVOR_BINDMEM_BYPASS;
476 476 }
477 477 }
478 478 mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
479 479 mr_op.mro_bind_override_addr = 1;
480 480 status = tavor_mr_register(state, pd, &mr_attr, &mr, &mr_op);
481 481 if (status != DDI_SUCCESS) {
482 482 /* Set "status" and "errormsg" and goto failure */
483 483 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed register mr");
484 484 goto qpalloc_fail9;
485 485 }
486 486
487 487 /*
488 488 * Calculate the offset between the kernel virtual address space
489 489 * and the IB virtual address space. This will be used when
490 490 * posting work requests to properly initialize each WQE.
491 491 */
492 492 qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
493 493 (uint64_t)mr->mr_bindinfo.bi_addr;
494 494
495 495 /*
496 496 * Fill in all the return arguments (if necessary). This includes
497 497 * real work queue sizes, real SGLs, and QP number
498 498 */
499 499 if (queuesz_p != NULL) {
500 500 queuesz_p->cs_sq = (1 << log_qp_sq_size);
501 501 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
502 502
503 503 /* QP on an SRQ set these to 0 */
504 504 if (qp_srq_en) {
505 505 queuesz_p->cs_rq = 0;
506 506 queuesz_p->cs_rq_sgl = 0;
507 507 } else {
508 508 queuesz_p->cs_rq = (1 << log_qp_rq_size);
509 509 queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
510 510 }
511 511 }
512 512 if (qpn != NULL) {
513 513 *qpn = (ib_qpn_t)qp->qp_qpnum;
514 514 }
515 515
516 516 /*
517 517 * Fill in the rest of the Tavor Queue Pair handle. We can update
518 518 * the following fields for use in further operations on the QP.
519 519 */
520 520 qp->qp_qpcrsrcp = qpc;
521 521 qp->qp_rsrcp = rsrc;
522 522 qp->qp_state = TAVOR_QP_RESET;
523 523 qp->qp_pdhdl = pd;
524 524 qp->qp_mrhdl = mr;
525 525 qp->qp_sq_sigtype = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
526 526 TAVOR_QP_SQ_WR_SIGNALED : TAVOR_QP_SQ_ALL_SIGNALED;
527 527 qp->qp_is_special = 0;
528 528 qp->qp_is_umap = qp_is_umap;
529 529 qp->qp_uarpg = (qp->qp_is_umap) ? uarpg : 0;
530 530 qp->qp_umap_dhp = (devmap_cookie_t)NULL;
531 531 qp->qp_sq_cqhdl = sq_cq;
532 532 qp->qp_sq_lastwqeaddr = NULL;
533 533 qp->qp_sq_bufsz = (1 << log_qp_sq_size);
534 534 qp->qp_sq_buf = sq_buf;
535 535 qp->qp_desc_off = qp_desc_off;
536 536 qp->qp_rq_cqhdl = rq_cq;
537 537 qp->qp_rq_lastwqeaddr = NULL;
538 538 qp->qp_rq_buf = rq_buf;
539 539
540 540 /* QP on an SRQ sets this to 0 */
541 541 if (qp_srq_en) {
542 542 qp->qp_rq_bufsz = 0;
543 543 } else {
544 544 qp->qp_rq_bufsz = (1 << log_qp_rq_size);
545 545 }
546 546
547 547 qp->qp_forward_sqd_event = 0;
548 548 qp->qp_sqd_still_draining = 0;
549 549 qp->qp_hdlrarg = (void *)ibt_qphdl;
550 550 qp->qp_mcg_refcnt = 0;
551 551
552 552 /*
553 553 * If this QP is to be associated with an SRQ, then set the SRQ handle
554 554 * appropriately.
555 555 */
556 556 if (qp_srq_en) {
557 557 qp->qp_srqhdl = srq;
558 558 qp->qp_srq_en = TAVOR_QP_SRQ_ENABLED;
559 559 tavor_srq_refcnt_inc(qp->qp_srqhdl);
560 560 } else {
561 561 qp->qp_srqhdl = NULL;
562 562 qp->qp_srq_en = TAVOR_QP_SRQ_DISABLED;
563 563 }
564 564
565 565 /* Determine if later ddi_dma_sync will be necessary */
566 566 qp->qp_sync = TAVOR_QP_IS_SYNC_REQ(state, qp->qp_wqinfo);
567 567
568 568 /* Determine the QP service type */
569 569 if (type == IBT_RC_RQP) {
570 570 qp->qp_serv_type = TAVOR_QP_RC;
571 571 } else if (type == IBT_UD_RQP) {
572 572 qp->qp_serv_type = TAVOR_QP_UD;
573 573 } else {
574 574 qp->qp_serv_type = TAVOR_QP_UC;
575 575 }
576 576
577 577 /* Zero out the QP context */
578 578 bzero(&qp->qpc, sizeof (tavor_hw_qpc_t));
579 579
580 580 /*
581 581 * Put QP handle in Tavor QPNum-to-QPHdl list. Then fill in the
582 582 * "qphdl" and return success
583 583 */
584 584 ASSERT(state->ts_qphdl[qpc->tr_indx] == NULL);
585 585 state->ts_qphdl[qpc->tr_indx] = qp;
586 586
587 587 /*
588 588 * If this is a user-mappable QP, then we need to insert the previously
589 589 * allocated entry into the "userland resources database". This will
590 590 * allow for later lookup during devmap() (i.e. mmap()) calls.
591 591 */
592 592 if (qp_is_umap) {
593 593 tavor_umap_db_add(umapdb);
594 594 }
595 595
596 596 *qphdl = qp;
597 597
598 598 TAVOR_TNF_EXIT(tavor_qp_alloc);
599 599 return (DDI_SUCCESS);
600 600
601 601 /*
602 602 * The following is cleanup for all possible failure cases in this routine
603 603 */
604 604 qpalloc_fail9:
605 605 tavor_queue_free(state, &qp->qp_wqinfo);
606 606 qpalloc_fail8:
607 607 if (type == IBT_RC_RQP) {
608 608 tavor_rsrc_free(state, &rdb);
609 609 }
610 610 qpalloc_fail7:
611 611 if (qp_is_umap) {
612 612 tavor_umap_db_free(umapdb);
613 613 }
614 614 qpalloc_fail6:
615 615 /*
616 616 * Releasing the QPN will also free up the QPC context. Update
617 617 * the QPC context pointer to indicate this.
618 618 */
619 619 tavor_qp_release_qpn(state, qp->qp_qpn_hdl, TAVOR_QPN_RELEASE);
620 620 qpc = NULL;
621 621 qpalloc_fail5:
622 622 tavor_rsrc_free(state, &rsrc);
623 623 qpalloc_fail4:
624 624 if (qpc) {
625 625 tavor_rsrc_free(state, &qpc);
626 626 }
627 627 qpalloc_fail3:
628 628 tavor_cq_refcnt_dec(rq_cq);
629 629 qpalloc_fail2:
630 630 tavor_cq_refcnt_dec(sq_cq);
631 631 qpalloc_fail1:
632 632 tavor_pd_refcnt_dec(pd);
633 633 qpalloc_fail:
634 634 TNF_PROBE_1(tavor_qp_alloc_fail, TAVOR_TNF_ERROR, "",
635 635 tnf_string, msg, errormsg);
636 636 TAVOR_TNF_EXIT(tavor_qp_alloc);
637 637 return (status);
638 638 }
639 639
640 640
641 641
642 642 /*
643 643 * tavor_special_qp_alloc()
644 644 * Context: Can be called only from user or kernel context.
645 645 */
646 646 int
647 647 tavor_special_qp_alloc(tavor_state_t *state, tavor_qp_info_t *qpinfo,
648 648 uint_t sleepflag, tavor_qp_options_t *op)
649 649 {
650 650 tavor_rsrc_t *qpc, *rsrc;
651 651 tavor_qphdl_t qp;
652 652 ibt_qp_alloc_attr_t *attr_p;
653 653 ibt_sqp_type_t type;
654 654 uint8_t port;
655 655 ibtl_qp_hdl_t ibt_qphdl;
656 656 ibt_chan_sizes_t *queuesz_p;
657 657 tavor_qphdl_t *qphdl;
658 658 ibt_mr_attr_t mr_attr;
659 659 tavor_mr_options_t mr_op;
660 660 tavor_pdhdl_t pd;
661 661 tavor_cqhdl_t sq_cq, rq_cq;
662 662 tavor_mrhdl_t mr;
663 663 uint64_t qp_desc_off;
664 664 uint32_t *sq_buf, *rq_buf;
665 665 uint32_t log_qp_sq_size, log_qp_rq_size;
666 666 uint32_t sq_size, rq_size, max_sgl;
667 667 uint32_t sq_wqe_size, rq_wqe_size;
668 668 uint_t wq_location, dma_xfer_mode;
669 669 int status, flag;
670 670 char *errormsg;
671 671
672 672 TAVOR_TNF_ENTER(tavor_special_qp_alloc);
673 673
674 674 /*
675 675 * Check the "options" flag. Currently this flag tells the driver
676 676 * whether or not the QP's work queues should be come from normal
677 677 * system memory or whether they should be allocated from DDR memory.
678 678 */
679 679 if (op == NULL) {
680 680 wq_location = TAVOR_QUEUE_LOCATION_NORMAL;
681 681 } else {
682 682 wq_location = op->qpo_wq_loc;
683 683 }
684 684
685 685 /*
686 686 * Extract the necessary info from the tavor_qp_info_t structure
687 687 */
688 688 attr_p = qpinfo->qpi_attrp;
689 689 type = qpinfo->qpi_type;
690 690 port = qpinfo->qpi_port;
691 691 ibt_qphdl = qpinfo->qpi_ibt_qphdl;
692 692 queuesz_p = qpinfo->qpi_queueszp;
693 693 qphdl = &qpinfo->qpi_qphdl;
694 694
695 695 /*
696 696 * Check for valid special QP type (only SMI & GSI supported)
697 697 */
698 698 if ((type != IBT_SMI_SQP) && (type != IBT_GSI_SQP)) {
699 699 /* Set "status" and "errormsg" and goto failure */
700 700 TAVOR_TNF_FAIL(IBT_QP_SPECIAL_TYPE_INVALID, "invalid QP type");
701 701 goto spec_qpalloc_fail;
702 702 }
703 703
704 704 /*
705 705 * Check for valid port number
706 706 */
707 707 if (!tavor_portnum_is_valid(state, port)) {
708 708 /* Set "status" and "errormsg" and goto failure */
709 709 TAVOR_TNF_FAIL(IBT_HCA_PORT_INVALID, "invalid port num");
710 710 goto spec_qpalloc_fail;
711 711 }
712 712 port = port - 1;
713 713
714 714 /*
715 715 * Check for valid PD handle pointer
716 716 */
717 717 if (attr_p->qp_pd_hdl == NULL) {
718 718 /* Set "status" and "errormsg" and goto failure */
719 719 TAVOR_TNF_FAIL(IBT_PD_HDL_INVALID, "invalid PD handle");
720 720 goto spec_qpalloc_fail;
721 721 }
722 722 pd = (tavor_pdhdl_t)attr_p->qp_pd_hdl;
723 723
724 724 /* Increment the reference count on the PD */
725 725 tavor_pd_refcnt_inc(pd);
726 726
727 727 /*
728 728 * Check for valid CQ handle pointers
729 729 */
730 730 if ((attr_p->qp_ibc_scq_hdl == NULL) ||
731 731 (attr_p->qp_ibc_rcq_hdl == NULL)) {
732 732 /* Set "status" and "errormsg" and goto failure */
733 733 TAVOR_TNF_FAIL(IBT_CQ_HDL_INVALID, "invalid CQ handle");
734 734 goto spec_qpalloc_fail1;
735 735 }
736 736 sq_cq = (tavor_cqhdl_t)attr_p->qp_ibc_scq_hdl;
737 737 rq_cq = (tavor_cqhdl_t)attr_p->qp_ibc_rcq_hdl;
738 738
739 739 /*
740 740 * Increment the reference count on the CQs. One or both of these
741 741 * could return error if we determine that the given CQ is already
742 742 * being used with a non-special QP (i.e. a normal QP).
743 743 */
744 744 status = tavor_cq_refcnt_inc(sq_cq, TAVOR_CQ_IS_SPECIAL);
745 745 if (status != DDI_SUCCESS) {
746 746 /* Set "status" and "errormsg" and goto failure */
747 747 TAVOR_TNF_FAIL(IBT_CQ_HDL_INVALID, "invalid CQ handle");
748 748 goto spec_qpalloc_fail1;
749 749 }
750 750 status = tavor_cq_refcnt_inc(rq_cq, TAVOR_CQ_IS_SPECIAL);
751 751 if (status != DDI_SUCCESS) {
752 752 /* Set "status" and "errormsg" and goto failure */
753 753 TAVOR_TNF_FAIL(IBT_CQ_HDL_INVALID, "invalid CQ handle");
754 754 goto spec_qpalloc_fail2;
755 755 }
756 756
757 757 /*
758 758 * Allocate the special QP resources. Essentially, this allocation
759 759 * amounts to checking if the request special QP has already been
760 760 * allocated. If successful, the QP context return is an actual
761 761 * QP context that has been "aliased" to act as a special QP of the
762 762 * appropriate type (and for the appropriate port). Just as in
763 763 * tavor_qp_alloc() above, ownership for this QP context is not
764 764 * immediately given to hardware in the final step here. Instead, we
765 765 * wait until the QP is later transitioned to the "Init" state before
766 766 * passing the QP to hardware. If we fail here, we must undo all
767 767 * the reference count (CQ and PD).
768 768 */
769 769 status = tavor_special_qp_rsrc_alloc(state, type, port, &qpc);
770 770 if (status != DDI_SUCCESS) {
771 771 /* Set "status" and "errormsg" and goto failure */
772 772 TAVOR_TNF_FAIL(status, "failed special QP rsrc");
773 773 goto spec_qpalloc_fail3;
774 774 }
775 775
776 776 /*
777 777 * Allocate the software structure for tracking the special queue
778 778 * pair (i.e. the Tavor Queue Pair handle). If we fail here, we
779 779 * must undo the reference counts and the previous resource allocation.
780 780 */
781 781 status = tavor_rsrc_alloc(state, TAVOR_QPHDL, 1, sleepflag, &rsrc);
782 782 if (status != DDI_SUCCESS) {
783 783 /* Set "status" and "errormsg" and goto failure */
784 784 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed QP handle");
785 785 goto spec_qpalloc_fail4;
786 786 }
787 787 qp = (tavor_qphdl_t)rsrc->tr_addr;
788 788 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
789 789
790 790 /*
791 791 * Actual QP number is a combination of the index of the QPC and
792 792 * the port number. This is because the special QP contexts must
793 793 * be allocated two-at-a-time.
794 794 */
795 795 qp->qp_qpnum = qpc->tr_indx + port;
↓ open down ↓ |
461 lines elided |
↑ open up ↑ |
796 796
797 797 /*
798 798 * Calculate the appropriate size for the work queues.
799 799 * Note: All Tavor QP work queues must be a power-of-2 in size. Also
800 800 * they may not be any smaller than TAVOR_QP_MIN_SIZE. This step is
801 801 * to round the requested size up to the next highest power-of-2
802 802 */
803 803 attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq, TAVOR_QP_MIN_SIZE);
804 804 attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq, TAVOR_QP_MIN_SIZE);
805 805 log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq);
806 - if ((attr_p->qp_sizes.cs_sq & (attr_p->qp_sizes.cs_sq - 1)) == 0) {
806 + if (ISP2(attr_p->qp_sizes.cs_sq)) {
807 807 log_qp_sq_size = log_qp_sq_size - 1;
808 808 }
809 809 log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
810 - if ((attr_p->qp_sizes.cs_rq & (attr_p->qp_sizes.cs_rq - 1)) == 0) {
810 + if (ISP2(attr_p->qp_sizes.cs_rq)) {
811 811 log_qp_rq_size = log_qp_rq_size - 1;
812 812 }
813 813
814 814 /*
815 815 * Next we verify that the rounded-up size is valid (i.e. consistent
816 816 * with the device limits and/or software-configured limits). If not,
817 817 * then obviously we have a bit of cleanup to do before returning.
818 818 */
819 819 if ((log_qp_sq_size > state->ts_cfg_profile->cp_log_max_qp_sz) ||
820 820 (log_qp_rq_size > state->ts_cfg_profile->cp_log_max_qp_sz)) {
821 821 /* Set "status" and "errormsg" and goto failure */
822 822 TAVOR_TNF_FAIL(IBT_HCA_WR_EXCEEDED, "max QP size");
823 823 goto spec_qpalloc_fail5;
824 824 }
825 825
826 826 /*
827 827 * Next we verify that the requested number of SGL is valid (i.e.
828 828 * consistent with the device limits and/or software-configured
829 829 * limits). If not, then obviously the same cleanup needs to be done.
830 830 */
831 831 max_sgl = state->ts_cfg_profile->cp_wqe_real_max_sgl;
832 832 if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
833 833 (attr_p->qp_sizes.cs_rq_sgl > max_sgl)) {
834 834 /* Set "status" and "errormsg" and goto failure */
835 835 TAVOR_TNF_FAIL(IBT_HCA_SGL_EXCEEDED, "max QP SGL");
836 836 goto spec_qpalloc_fail5;
837 837 }
838 838
839 839 /*
840 840 * Determine this QP's WQE sizes (for both the Send and Recv WQEs).
841 841 * This will depend on the requested number of SGLs. Note: this
842 842 * has the side-effect of also calculating the real number of SGLs
843 843 * (for the calculated WQE size).
844 844 */
845 845 tavor_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
846 846 TAVOR_QP_WQ_TYPE_RECVQ, &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl);
847 847 if (type == IBT_SMI_SQP) {
848 848 tavor_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
849 849 TAVOR_QP_WQ_TYPE_SENDMLX_QP0, &qp->qp_sq_log_wqesz,
850 850 &qp->qp_sq_sgl);
851 851 } else {
852 852 tavor_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
853 853 TAVOR_QP_WQ_TYPE_SENDMLX_QP1, &qp->qp_sq_log_wqesz,
854 854 &qp->qp_sq_sgl);
855 855 }
856 856
857 857 /*
858 858 * Allocate the memory for QP work queues. Note: The location from
859 859 * which we will allocate these work queues has been passed in
860 860 * through the tavor_qp_options_t structure. Since Tavor work queues
861 861 * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
862 862 * the work queue memory is very important. We used to allocate
863 863 * work queues (the combined receive and send queues) so that they
864 864 * would be aligned on their combined size. That alignment guaranteed
865 865 * that they would never cross the 4GB boundary (Tavor work queues
866 866 * are on the order of MBs at maximum). Now we are able to relax
867 867 * this alignment constraint by ensuring that the IB address assigned
868 868 * to the queue memory (as a result of the tavor_mr_register() call)
869 869 * is offset from zero.
870 870 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
871 871 * guarantee the alignment, but when attempting to use IOMMU bypass
872 872 * mode we found that we were not allowed to specify any alignment
873 873 * that was more restrictive than the system page size.
874 874 * So we avoided this constraint by passing two alignment values,
875 875 * one for the memory allocation itself and the other for the DMA
876 876 * handle (for later bind). This used to cause more memory than
877 877 * necessary to be allocated (in order to guarantee the more
878 878 * restrictive alignment contraint). But be guaranteeing the
879 879 * zero-based IB virtual address for the queue, we are able to
880 880 * conserve this memory.
881 881 */
882 882 sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
883 883 rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
884 884 sq_size = (1 << log_qp_sq_size) * sq_wqe_size;
885 885 rq_size = (1 << log_qp_rq_size) * rq_wqe_size;
886 886 qp->qp_wqinfo.qa_size = sq_size + rq_size;
887 887 qp->qp_wqinfo.qa_alloc_align = max(sq_wqe_size, rq_wqe_size);
888 888 qp->qp_wqinfo.qa_bind_align = max(sq_wqe_size, rq_wqe_size);
889 889 qp->qp_wqinfo.qa_location = wq_location;
890 890 status = tavor_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
891 891 if (status != NULL) {
892 892 /* Set "status" and "errormsg" and goto failure */
893 893 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed work queue");
894 894 goto spec_qpalloc_fail5;
895 895 }
896 896 if (sq_wqe_size > rq_wqe_size) {
897 897 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
898 898 rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
899 899 } else {
900 900 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
901 901 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
902 902 }
903 903
904 904 /*
905 905 * Register the memory for the special QP work queues. The memory for
906 906 * the special QP must be registered in the Tavor TPT tables. This
907 907 * gives us the LKey to specify in the QP context later. Note: The
908 908 * memory for Tavor work queues (both Send and Recv) must be contiguous
909 909 * and registered as a single memory region. Note also: If the work
910 910 * queue is to be allocated from DDR memory, then only a "bypass"
911 911 * mapping is appropriate.
912 912 * Also, in order to meet the alignment restriction, we pass the
913 913 * "mro_bind_override_addr" flag in the call to tavor_mr_register().
914 914 * This guarantees that the resulting IB vaddr will be zero-based
915 915 * (modulo the offset into the first page).
916 916 * If we fail here, we have a bunch of resource and reference count
917 917 * cleanup to do.
918 918 */
919 919 flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP :
920 920 IBT_MR_NOSLEEP;
921 921 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
922 922 mr_attr.mr_len = qp->qp_wqinfo.qa_size;
923 923 mr_attr.mr_as = NULL;
924 924 mr_attr.mr_flags = flag;
925 925 if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) {
926 926 mr_op.mro_bind_type = state->ts_cfg_profile->cp_iommu_bypass;
927 927
928 928 dma_xfer_mode = state->ts_cfg_profile->cp_streaming_consistent;
929 929 if (dma_xfer_mode == DDI_DMA_STREAMING) {
930 930 mr_attr.mr_flags |= IBT_MR_NONCOHERENT;
931 931 }
932 932 } else {
933 933 mr_op.mro_bind_type = TAVOR_BINDMEM_BYPASS;
934 934 }
935 935 mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
936 936 mr_op.mro_bind_override_addr = 1;
937 937 status = tavor_mr_register(state, pd, &mr_attr, &mr, &mr_op);
938 938 if (status != DDI_SUCCESS) {
939 939 /* Set "status" and "errormsg" and goto failure */
940 940 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed register mr");
941 941 goto spec_qpalloc_fail6;
942 942 }
943 943
944 944 /*
945 945 * Calculate the offset between the kernel virtual address space
946 946 * and the IB virtual address space. This will be used when
947 947 * posting work requests to properly initialize each WQE.
948 948 */
949 949 qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
950 950 (uint64_t)mr->mr_bindinfo.bi_addr;
951 951
952 952 /*
953 953 * Fill in all the return arguments (if necessary). This includes
954 954 * real work queue sizes, real SGLs, and QP number (which will be
955 955 * either zero or one, depending on the special QP type)
956 956 */
957 957 if (queuesz_p != NULL) {
958 958 queuesz_p->cs_sq = (1 << log_qp_sq_size);
959 959 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
960 960 queuesz_p->cs_rq = (1 << log_qp_rq_size);
961 961 queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
962 962 }
963 963
964 964 /*
965 965 * Fill in the rest of the Tavor Queue Pair handle. We can update
966 966 * the following fields for use in further operations on the QP.
967 967 */
968 968 qp->qp_qpcrsrcp = qpc;
969 969 qp->qp_rsrcp = rsrc;
970 970 qp->qp_state = TAVOR_QP_RESET;
971 971 qp->qp_pdhdl = pd;
972 972 qp->qp_mrhdl = mr;
973 973 qp->qp_sq_sigtype = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
974 974 TAVOR_QP_SQ_WR_SIGNALED : TAVOR_QP_SQ_ALL_SIGNALED;
975 975 qp->qp_is_special = (type == IBT_SMI_SQP) ?
976 976 TAVOR_QP_SMI : TAVOR_QP_GSI;
977 977 qp->qp_is_umap = 0;
978 978 qp->qp_uarpg = 0;
979 979 qp->qp_sq_cqhdl = sq_cq;
980 980 qp->qp_sq_lastwqeaddr = NULL;
981 981 qp->qp_sq_bufsz = (1 << log_qp_sq_size);
982 982 qp->qp_sq_buf = sq_buf;
983 983 qp->qp_desc_off = qp_desc_off;
984 984 qp->qp_rq_cqhdl = rq_cq;
985 985 qp->qp_rq_lastwqeaddr = NULL;
986 986 qp->qp_rq_bufsz = (1 << log_qp_rq_size);
987 987 qp->qp_rq_buf = rq_buf;
988 988 qp->qp_portnum = port;
989 989 qp->qp_pkeyindx = 0;
990 990 qp->qp_hdlrarg = (void *)ibt_qphdl;
991 991 qp->qp_mcg_refcnt = 0;
992 992 qp->qp_srq_en = 0;
993 993 qp->qp_srqhdl = NULL;
994 994
995 995 /* Determine if later ddi_dma_sync will be necessary */
996 996 qp->qp_sync = TAVOR_QP_IS_SYNC_REQ(state, qp->qp_wqinfo);
997 997
998 998 /* All special QPs are UD QP service type */
999 999 qp->qp_serv_type = TAVOR_QP_UD;
1000 1000
1001 1001 /* Zero out the QP context */
1002 1002 bzero(&qp->qpc, sizeof (tavor_hw_qpc_t));
1003 1003
1004 1004 /*
1005 1005 * Put QP handle in Tavor QPNum-to-QPHdl list. Then fill in the
1006 1006 * "qphdl" and return success
1007 1007 */
1008 1008 ASSERT(state->ts_qphdl[qpc->tr_indx + port] == NULL);
1009 1009 state->ts_qphdl[qpc->tr_indx + port] = qp;
1010 1010
1011 1011 *qphdl = qp;
1012 1012
1013 1013 TAVOR_TNF_EXIT(tavor_special_qp_alloc);
1014 1014 return (DDI_SUCCESS);
1015 1015
1016 1016 /*
1017 1017 * The following is cleanup for all possible failure cases in this routine
1018 1018 */
1019 1019 spec_qpalloc_fail6:
1020 1020 tavor_queue_free(state, &qp->qp_wqinfo);
1021 1021 spec_qpalloc_fail5:
1022 1022 tavor_rsrc_free(state, &rsrc);
1023 1023 spec_qpalloc_fail4:
1024 1024 if (tavor_special_qp_rsrc_free(state, type, port) != DDI_SUCCESS) {
1025 1025 TAVOR_WARNING(state, "failed to free special QP rsrc");
1026 1026 }
1027 1027 spec_qpalloc_fail3:
1028 1028 tavor_cq_refcnt_dec(rq_cq);
1029 1029 spec_qpalloc_fail2:
1030 1030 tavor_cq_refcnt_dec(sq_cq);
1031 1031 spec_qpalloc_fail1:
1032 1032 tavor_pd_refcnt_dec(pd);
1033 1033 spec_qpalloc_fail:
1034 1034 TNF_PROBE_1(tavor_special_qp_alloc_fail, TAVOR_TNF_ERROR, "",
1035 1035 tnf_string, msg, errormsg);
1036 1036 TAVOR_TNF_EXIT(tavor_special_qp_alloc);
1037 1037 return (status);
1038 1038 }
1039 1039
1040 1040
1041 1041 /*
1042 1042 * tavor_qp_free()
1043 1043 * This function frees up the QP resources. Depending on the value
1044 1044 * of the "free_qp_flags", the QP number may not be released until
1045 1045 * a subsequent call to tavor_qp_release_qpn().
1046 1046 *
1047 1047 * Context: Can be called only from user or kernel context.
1048 1048 */
1049 1049 /* ARGSUSED */
1050 1050 int
1051 1051 tavor_qp_free(tavor_state_t *state, tavor_qphdl_t *qphdl,
1052 1052 ibc_free_qp_flags_t free_qp_flags, ibc_qpn_hdl_t *qpnh,
1053 1053 uint_t sleepflag)
1054 1054 {
1055 1055 tavor_rsrc_t *qpc, *rdb, *rsrc;
1056 1056 tavor_umap_db_entry_t *umapdb;
1057 1057 tavor_qpn_entry_t *entry;
1058 1058 tavor_pdhdl_t pd;
1059 1059 tavor_mrhdl_t mr;
1060 1060 tavor_cqhdl_t sq_cq, rq_cq;
1061 1061 tavor_srqhdl_t srq;
1062 1062 tavor_qphdl_t qp;
1063 1063 uint64_t value;
1064 1064 uint_t type, port;
1065 1065 uint_t maxprot;
1066 1066 uint_t qp_srq_en;
1067 1067 int status;
1068 1068 char *errormsg;
1069 1069
1070 1070 TAVOR_TNF_ENTER(tavor_qp_free);
1071 1071
1072 1072 /*
1073 1073 * Pull all the necessary information from the Tavor Queue Pair
1074 1074 * handle. This is necessary here because the resource for the
1075 1075 * QP handle is going to be freed up as part of this operation.
1076 1076 */
1077 1077 qp = *qphdl;
1078 1078 mutex_enter(&qp->qp_lock);
1079 1079 qpc = qp->qp_qpcrsrcp;
1080 1080 rsrc = qp->qp_rsrcp;
1081 1081 pd = qp->qp_pdhdl;
1082 1082 srq = qp->qp_srqhdl;
1083 1083 mr = qp->qp_mrhdl;
1084 1084 rq_cq = qp->qp_rq_cqhdl;
1085 1085 sq_cq = qp->qp_sq_cqhdl;
1086 1086 rdb = qp->qp_rdbrsrcp;
1087 1087 port = qp->qp_portnum;
1088 1088 qp_srq_en = qp->qp_srq_en;
1089 1089
1090 1090 /*
1091 1091 * If the QP is part of an MCG, then we fail the qp_free
1092 1092 */
1093 1093 if (qp->qp_mcg_refcnt != 0) {
1094 1094 mutex_exit(&qp->qp_lock);
1095 1095 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), "QP part of MCG on free");
1096 1096 goto qpfree_fail;
1097 1097 }
1098 1098
1099 1099 /*
1100 1100 * If the QP is not already in "Reset" state, then transition to
1101 1101 * "Reset". This is necessary because software does not reclaim
1102 1102 * ownership of the QP context until the QP is in the "Reset" state.
1103 1103 * If the ownership transfer fails for any reason, then it is an
1104 1104 * indication that something (either in HW or SW) has gone seriously
1105 1105 * wrong. So we print a warning message and return.
1106 1106 */
1107 1107 if (qp->qp_state != TAVOR_QP_RESET) {
1108 1108 if (tavor_qp_to_reset(state, qp) != DDI_SUCCESS) {
1109 1109 mutex_exit(&qp->qp_lock);
1110 1110 TAVOR_WARNING(state, "failed to reset QP context");
1111 1111 /* Set "status" and "errormsg" and goto failure */
1112 1112 TAVOR_TNF_FAIL(ibc_get_ci_failure(0),
1113 1113 "reset QP context");
1114 1114 goto qpfree_fail;
1115 1115 }
1116 1116 qp->qp_state = TAVOR_QP_RESET;
1117 1117
1118 1118 /*
1119 1119 * Do any additional handling necessary for the transition
1120 1120 * to the "Reset" state (e.g. update the WRID lists)
1121 1121 */
1122 1122 tavor_wrid_to_reset_handling(state, qp);
1123 1123 }
1124 1124
1125 1125 /*
1126 1126 * If this was a user-mappable QP, then we need to remove its entry
1127 1127 * from the "userland resources database". If it is also currently
1128 1128 * mmap()'d out to a user process, then we need to call
1129 1129 * devmap_devmem_remap() to remap the QP memory to an invalid mapping.
1130 1130 * We also need to invalidate the QP tracking information for the
1131 1131 * user mapping.
1132 1132 */
1133 1133 if (qp->qp_is_umap) {
1134 1134 status = tavor_umap_db_find(state->ts_instance, qp->qp_qpnum,
1135 1135 MLNX_UMAP_QPMEM_RSRC, &value, TAVOR_UMAP_DB_REMOVE,
1136 1136 &umapdb);
1137 1137 if (status != DDI_SUCCESS) {
1138 1138 mutex_exit(&qp->qp_lock);
1139 1139 TAVOR_WARNING(state, "failed to find in database");
1140 1140 TAVOR_TNF_EXIT(tavor_qp_free);
1141 1141 return (ibc_get_ci_failure(0));
1142 1142 }
1143 1143 tavor_umap_db_free(umapdb);
1144 1144 if (qp->qp_umap_dhp != NULL) {
1145 1145 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
1146 1146 status = devmap_devmem_remap(qp->qp_umap_dhp,
1147 1147 state->ts_dip, 0, 0, qp->qp_wqinfo.qa_size,
1148 1148 maxprot, DEVMAP_MAPPING_INVALID, NULL);
1149 1149 if (status != DDI_SUCCESS) {
1150 1150 mutex_exit(&qp->qp_lock);
1151 1151 TAVOR_WARNING(state, "failed in QP memory "
1152 1152 "devmap_devmem_remap()");
1153 1153 TAVOR_TNF_EXIT(tavor_qp_free);
1154 1154 return (ibc_get_ci_failure(0));
1155 1155 }
1156 1156 qp->qp_umap_dhp = (devmap_cookie_t)NULL;
1157 1157 }
1158 1158 }
1159 1159
1160 1160 /*
1161 1161 * Put NULL into the Tavor QPNum-to-QPHdl list. This will allow any
1162 1162 * in-progress events to detect that the QP corresponding to this
1163 1163 * number has been freed. Note: it does depend in whether we are
1164 1164 * freeing a special QP or not.
1165 1165 */
1166 1166 if (qp->qp_is_special) {
1167 1167 state->ts_qphdl[qpc->tr_indx + port] = NULL;
1168 1168 } else {
1169 1169 state->ts_qphdl[qpc->tr_indx] = NULL;
1170 1170 }
1171 1171
1172 1172 /*
1173 1173 * Drop the QP lock
1174 1174 * At this point the lock is no longer necessary. We cannot
1175 1175 * protect from multiple simultaneous calls to free the same QP.
1176 1176 * In addition, since the QP lock is contained in the QP "software
1177 1177 * handle" resource, which we will free (see below), it is
1178 1178 * important that we have no further references to that memory.
1179 1179 */
1180 1180 mutex_exit(&qp->qp_lock);
1181 1181 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
1182 1182
1183 1183 /*
1184 1184 * Free the QP resources
1185 1185 * Start by deregistering and freeing the memory for work queues.
1186 1186 * Next free any previously allocated context information
1187 1187 * (depending on QP type)
1188 1188 * Finally, decrement the necessary reference counts.
1189 1189 * If this fails for any reason, then it is an indication that
1190 1190 * something (either in HW or SW) has gone seriously wrong. So we
1191 1191 * print a warning message and return.
1192 1192 */
1193 1193 status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
1194 1194 sleepflag);
1195 1195 if (status != DDI_SUCCESS) {
1196 1196 TAVOR_WARNING(state, "failed to deregister QP memory");
1197 1197 /* Set "status" and "errormsg" and goto failure */
1198 1198 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), "failed deregister mr");
1199 1199 goto qpfree_fail;
1200 1200 }
1201 1201
1202 1202 /* Free the memory for the QP */
1203 1203 tavor_queue_free(state, &qp->qp_wqinfo);
1204 1204
1205 1205 /*
1206 1206 * Free up the remainder of the QP resources. Note: we have a few
1207 1207 * different resources to free up depending on whether the QP is a
1208 1208 * special QP or not. As described above, if any of these fail for
1209 1209 * any reason it is an indication that something (either in HW or SW)
1210 1210 * has gone seriously wrong. So we print a warning message and
1211 1211 * return.
1212 1212 */
1213 1213 if (qp->qp_is_special) {
1214 1214 type = (qp->qp_is_special == TAVOR_QP_SMI) ?
1215 1215 IBT_SMI_SQP : IBT_GSI_SQP;
1216 1216
1217 1217 /* Free up resources for the special QP */
1218 1218 status = tavor_special_qp_rsrc_free(state, type, port);
1219 1219 if (status != DDI_SUCCESS) {
1220 1220 TAVOR_WARNING(state, "failed to free special QP rsrc");
1221 1221 /* Set "status" and "errormsg" and goto failure */
1222 1222 TAVOR_TNF_FAIL(ibc_get_ci_failure(0),
1223 1223 "failed special QP rsrc");
1224 1224 goto qpfree_fail;
1225 1225 }
1226 1226
1227 1227 } else {
1228 1228 type = qp->qp_serv_type;
1229 1229
1230 1230 /* Free up the RDB entries resource */
1231 1231 if (type == TAVOR_QP_RC) {
1232 1232 tavor_rsrc_free(state, &rdb);
1233 1233 }
1234 1234
1235 1235 /*
1236 1236 * Check the flags and determine whether to release the
1237 1237 * QPN or not, based on their value.
1238 1238 */
1239 1239 if (free_qp_flags == IBC_FREE_QP_ONLY) {
1240 1240 entry = qp->qp_qpn_hdl;
1241 1241 tavor_qp_release_qpn(state, qp->qp_qpn_hdl,
1242 1242 TAVOR_QPN_FREE_ONLY);
1243 1243 *qpnh = (ibc_qpn_hdl_t)entry;
1244 1244 } else {
1245 1245 tavor_qp_release_qpn(state, qp->qp_qpn_hdl,
1246 1246 TAVOR_QPN_RELEASE);
1247 1247 }
1248 1248 }
1249 1249
1250 1250 /* Free the Tavor Queue Pair handle */
1251 1251 tavor_rsrc_free(state, &rsrc);
1252 1252
1253 1253 /* Decrement the reference counts on CQs, PD and SRQ (if needed) */
1254 1254 tavor_cq_refcnt_dec(rq_cq);
1255 1255 tavor_cq_refcnt_dec(sq_cq);
1256 1256 tavor_pd_refcnt_dec(pd);
1257 1257 if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
1258 1258 tavor_srq_refcnt_dec(srq);
1259 1259 }
1260 1260
1261 1261 /* Set the qphdl pointer to NULL and return success */
1262 1262 *qphdl = NULL;
1263 1263
1264 1264 TAVOR_TNF_EXIT(tavor_qp_free);
1265 1265 return (DDI_SUCCESS);
1266 1266
1267 1267 qpfree_fail:
1268 1268 TNF_PROBE_1(tavor_qp_free_fail, TAVOR_TNF_ERROR, "",
1269 1269 tnf_string, msg, errormsg);
1270 1270 TAVOR_TNF_EXIT(tavor_qp_free);
1271 1271 return (status);
1272 1272 }
1273 1273
1274 1274
1275 1275 /*
1276 1276 * tavor_qp_query()
1277 1277 * Context: Can be called from interrupt or base context.
1278 1278 */
1279 1279 int
1280 1280 tavor_qp_query(tavor_state_t *state, tavor_qphdl_t qp,
1281 1281 ibt_qp_query_attr_t *attr_p)
1282 1282 {
1283 1283 ibt_cep_state_t qp_state;
1284 1284 ibt_qp_ud_attr_t *ud;
1285 1285 ibt_qp_rc_attr_t *rc;
1286 1286 ibt_qp_uc_attr_t *uc;
1287 1287 ibt_cep_flags_t enable_flags;
1288 1288 tavor_hw_addr_path_t *qpc_path, *qpc_alt_path;
1289 1289 ibt_cep_path_t *path_ptr, *alt_path_ptr;
1290 1290 tavor_hw_qpc_t *qpc;
1291 1291 int status;
1292 1292
1293 1293 TAVOR_TNF_ENTER(tavor_qp_query);
1294 1294
1295 1295 mutex_enter(&qp->qp_lock);
1296 1296
1297 1297 /*
1298 1298 * Grab the temporary QPC entry from QP software state
1299 1299 */
1300 1300 qpc = &qp->qpc;
1301 1301
1302 1302 /* Convert the current Tavor QP state to IBTF QP state */
1303 1303 switch (qp->qp_state) {
1304 1304 case TAVOR_QP_RESET:
1305 1305 qp_state = IBT_STATE_RESET; /* "Reset" */
1306 1306 break;
1307 1307 case TAVOR_QP_INIT:
1308 1308 qp_state = IBT_STATE_INIT; /* Initialized */
1309 1309 break;
1310 1310 case TAVOR_QP_RTR:
1311 1311 qp_state = IBT_STATE_RTR; /* Ready to Receive */
1312 1312 break;
1313 1313 case TAVOR_QP_RTS:
1314 1314 qp_state = IBT_STATE_RTS; /* Ready to Send */
1315 1315 break;
1316 1316 case TAVOR_QP_SQERR:
1317 1317 qp_state = IBT_STATE_SQE; /* Send Queue Error */
1318 1318 break;
1319 1319 case TAVOR_QP_SQD:
1320 1320 if (qp->qp_sqd_still_draining) {
1321 1321 qp_state = IBT_STATE_SQDRAIN; /* SQ Draining */
1322 1322 } else {
1323 1323 qp_state = IBT_STATE_SQD; /* SQ Drained */
1324 1324 }
1325 1325 break;
1326 1326 case TAVOR_QP_ERR:
1327 1327 qp_state = IBT_STATE_ERROR; /* Error */
1328 1328 break;
1329 1329 default:
1330 1330 mutex_exit(&qp->qp_lock);
1331 1331 TNF_PROBE_1(tavor_qp_query_inv_qpstate_fail,
1332 1332 TAVOR_TNF_ERROR, "", tnf_uint, qpstate, qp->qp_state);
1333 1333 TAVOR_TNF_EXIT(tavor_qp_query);
1334 1334 return (ibc_get_ci_failure(0));
1335 1335 }
1336 1336 attr_p->qp_info.qp_state = qp_state;
1337 1337
1338 1338 /* SRQ Hook. */
1339 1339 attr_p->qp_srq = NULL;
1340 1340
1341 1341 /*
1342 1342 * The following QP information is always returned, regardless of
1343 1343 * the current QP state. Note: Some special handling is necessary
1344 1344 * for calculating the QP number on special QP (QP0 and QP1).
1345 1345 */
1346 1346 attr_p->qp_sq_cq = qp->qp_sq_cqhdl->cq_hdlrarg;
1347 1347 attr_p->qp_rq_cq = qp->qp_rq_cqhdl->cq_hdlrarg;
1348 1348 if (qp->qp_is_special) {
1349 1349 attr_p->qp_qpn = (qp->qp_is_special == TAVOR_QP_SMI) ? 0 : 1;
1350 1350 } else {
1351 1351 attr_p->qp_qpn = (ib_qpn_t)qp->qp_qpnum;
1352 1352 }
1353 1353 attr_p->qp_sq_sgl = qp->qp_sq_sgl;
1354 1354 attr_p->qp_rq_sgl = qp->qp_rq_sgl;
1355 1355 attr_p->qp_info.qp_sq_sz = qp->qp_sq_bufsz;
1356 1356 attr_p->qp_info.qp_rq_sz = qp->qp_rq_bufsz;
1357 1357
1358 1358 /*
1359 1359 * If QP is currently in the "Reset" state, then only the above are
1360 1360 * returned
1361 1361 */
1362 1362 if (qp_state == IBT_STATE_RESET) {
1363 1363 mutex_exit(&qp->qp_lock);
1364 1364 TAVOR_TNF_EXIT(tavor_qp_query);
1365 1365 return (DDI_SUCCESS);
1366 1366 }
1367 1367
1368 1368 /*
1369 1369 * Post QUERY_QP command to firmware
1370 1370 *
1371 1371 * We do a TAVOR_NOSLEEP here because we are holding the "qp_lock".
1372 1372 * Since we may be in the interrupt context (or subsequently raised
1373 1373 * to interrupt level by priority inversion), we do not want to block
1374 1374 * in this routine waiting for success.
1375 1375 */
1376 1376 status = tavor_cmn_query_cmd_post(state, QUERY_QP, qp->qp_qpnum,
1377 1377 qpc, sizeof (tavor_hw_qpc_t), TAVOR_CMD_NOSLEEP_SPIN);
1378 1378 if (status != TAVOR_CMD_SUCCESS) {
1379 1379 mutex_exit(&qp->qp_lock);
1380 1380 cmn_err(CE_CONT, "Tavor: QUERY_QP command failed: %08x\n",
1381 1381 status);
1382 1382 TNF_PROBE_1(tavor_qp_query_cmd_fail, TAVOR_TNF_ERROR, "",
1383 1383 tnf_uint, status, status);
1384 1384 TAVOR_TNF_EXIT(tavor_qp_query);
1385 1385 return (ibc_get_ci_failure(0));
1386 1386 }
1387 1387
1388 1388 /*
1389 1389 * Fill in the additional QP info based on the QP's transport type.
1390 1390 */
1391 1391 if (qp->qp_serv_type == TAVOR_QP_UD) {
1392 1392
1393 1393 /* Fill in the UD-specific info */
1394 1394 ud = &attr_p->qp_info.qp_transport.ud;
1395 1395 ud->ud_qkey = (ib_qkey_t)qpc->qkey;
1396 1396 ud->ud_sq_psn = qpc->next_snd_psn;
1397 1397 ud->ud_pkey_ix = qpc->pri_addr_path.pkey_indx;
1398 1398 ud->ud_port = qpc->pri_addr_path.portnum;
1399 1399
1400 1400 attr_p->qp_info.qp_trans = IBT_UD_SRV;
1401 1401
1402 1402 } else if (qp->qp_serv_type == TAVOR_QP_RC) {
1403 1403
1404 1404 /* Fill in the RC-specific info */
1405 1405 rc = &attr_p->qp_info.qp_transport.rc;
1406 1406 rc->rc_sq_psn = qpc->next_snd_psn;
1407 1407 rc->rc_rq_psn = qpc->next_rcv_psn;
1408 1408 rc->rc_dst_qpn = qpc->rem_qpn;
1409 1409
1410 1410 /* Grab the path migration state information */
1411 1411 if (qpc->pm_state == TAVOR_QP_PMSTATE_MIGRATED) {
1412 1412 rc->rc_mig_state = IBT_STATE_MIGRATED;
1413 1413 } else if (qpc->pm_state == TAVOR_QP_PMSTATE_REARM) {
1414 1414 rc->rc_mig_state = IBT_STATE_REARMED;
1415 1415 } else {
1416 1416 rc->rc_mig_state = IBT_STATE_ARMED;
1417 1417 }
1418 1418 rc->rc_rdma_ra_out = (1 << qpc->sra_max);
1419 1419 rc->rc_rdma_ra_in = (1 << qpc->rra_max);
1420 1420 rc->rc_min_rnr_nak = qpc->min_rnr_nak;
1421 1421 rc->rc_path_mtu = qpc->mtu;
1422 1422 rc->rc_retry_cnt = qpc->retry_cnt;
1423 1423
1424 1424 /* Get the common primary address path fields */
1425 1425 qpc_path = &qpc->pri_addr_path;
1426 1426 path_ptr = &rc->rc_path;
1427 1427 tavor_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect,
1428 1428 TAVOR_ADDRPATH_QP, qp);
1429 1429
1430 1430 /* Fill in the additional primary address path fields */
1431 1431 path_ptr->cep_pkey_ix = qpc_path->pkey_indx;
1432 1432 path_ptr->cep_hca_port_num = qpc_path->portnum;
1433 1433 path_ptr->cep_timeout = qpc_path->ack_timeout;
1434 1434
1435 1435 /* Get the common alternate address path fields */
1436 1436 qpc_alt_path = &qpc->alt_addr_path;
1437 1437 alt_path_ptr = &rc->rc_alt_path;
1438 1438 tavor_get_addr_path(state, qpc_alt_path,
1439 1439 &alt_path_ptr->cep_adds_vect, TAVOR_ADDRPATH_QP, qp);
1440 1440
1441 1441 /* Fill in the additional alternate address path fields */
1442 1442 alt_path_ptr->cep_pkey_ix = qpc_alt_path->pkey_indx;
1443 1443 alt_path_ptr->cep_hca_port_num = qpc_alt_path->portnum;
1444 1444 alt_path_ptr->cep_timeout = qpc_alt_path->ack_timeout;
1445 1445
1446 1446 /* Get the RNR retry time from primary path */
1447 1447 rc->rc_rnr_retry_cnt = qpc_path->rnr_retry;
1448 1448
1449 1449 /* Set the enable flags based on RDMA/Atomic enable bits */
1450 1450 enable_flags = IBT_CEP_NO_FLAGS;
1451 1451 enable_flags |= ((qpc->rre == 0) ? 0 : IBT_CEP_RDMA_RD);
1452 1452 enable_flags |= ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR);
1453 1453 enable_flags |= ((qpc->rae == 0) ? 0 : IBT_CEP_ATOMIC);
1454 1454 attr_p->qp_info.qp_flags = enable_flags;
1455 1455
1456 1456 attr_p->qp_info.qp_trans = IBT_RC_SRV;
1457 1457
1458 1458 } else if (qp->qp_serv_type == TAVOR_QP_UC) {
1459 1459
1460 1460 /* Fill in the UC-specific info */
1461 1461 uc = &attr_p->qp_info.qp_transport.uc;
1462 1462 uc->uc_sq_psn = qpc->next_snd_psn;
1463 1463 uc->uc_rq_psn = qpc->next_rcv_psn;
1464 1464 uc->uc_dst_qpn = qpc->rem_qpn;
1465 1465
1466 1466 /* Grab the path migration state information */
1467 1467 if (qpc->pm_state == TAVOR_QP_PMSTATE_MIGRATED) {
1468 1468 uc->uc_mig_state = IBT_STATE_MIGRATED;
1469 1469 } else if (qpc->pm_state == TAVOR_QP_PMSTATE_REARM) {
1470 1470 uc->uc_mig_state = IBT_STATE_REARMED;
1471 1471 } else {
1472 1472 uc->uc_mig_state = IBT_STATE_ARMED;
1473 1473 }
1474 1474 uc->uc_path_mtu = qpc->mtu;
1475 1475
1476 1476 /* Get the common primary address path fields */
1477 1477 qpc_path = &qpc->pri_addr_path;
1478 1478 path_ptr = &uc->uc_path;
1479 1479 tavor_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect,
1480 1480 TAVOR_ADDRPATH_QP, qp);
1481 1481
1482 1482 /* Fill in the additional primary address path fields */
1483 1483 path_ptr->cep_pkey_ix = qpc_path->pkey_indx;
1484 1484 path_ptr->cep_hca_port_num = qpc_path->portnum;
1485 1485
1486 1486 /* Get the common alternate address path fields */
1487 1487 qpc_alt_path = &qpc->alt_addr_path;
1488 1488 alt_path_ptr = &uc->uc_alt_path;
1489 1489 tavor_get_addr_path(state, qpc_alt_path,
1490 1490 &alt_path_ptr->cep_adds_vect, TAVOR_ADDRPATH_QP, qp);
1491 1491
1492 1492 /* Fill in the additional alternate address path fields */
1493 1493 alt_path_ptr->cep_pkey_ix = qpc_alt_path->pkey_indx;
1494 1494 alt_path_ptr->cep_hca_port_num = qpc_alt_path->portnum;
1495 1495
1496 1496 /*
1497 1497 * Set the enable flags based on RDMA enable bits (by
1498 1498 * definition UC doesn't support Atomic or RDMA Read)
1499 1499 */
1500 1500 enable_flags = ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR);
1501 1501 attr_p->qp_info.qp_flags = enable_flags;
1502 1502
1503 1503 attr_p->qp_info.qp_trans = IBT_UC_SRV;
1504 1504
1505 1505 } else {
1506 1506 TAVOR_WARNING(state, "unexpected QP transport type");
1507 1507 mutex_exit(&qp->qp_lock);
1508 1508 return (ibc_get_ci_failure(0));
1509 1509 }
1510 1510
1511 1511 /*
1512 1512 * Under certain circumstances it is possible for the Tavor hardware
1513 1513 * to transition to one of the error states without software directly
1514 1514 * knowing about it. The QueryQP() call is the one place where we
1515 1515 * have an opportunity to sample and update our view of the QP state.
1516 1516 */
1517 1517 if (qpc->state == TAVOR_QP_SQERR) {
1518 1518 attr_p->qp_info.qp_state = IBT_STATE_SQE;
1519 1519 qp->qp_state = TAVOR_QP_SQERR;
1520 1520 }
1521 1521 if (qpc->state == TAVOR_QP_ERR) {
1522 1522 attr_p->qp_info.qp_state = IBT_STATE_ERROR;
1523 1523 qp->qp_state = TAVOR_QP_ERR;
1524 1524 }
1525 1525 mutex_exit(&qp->qp_lock);
1526 1526
1527 1527 TAVOR_TNF_EXIT(tavor_qp_query);
1528 1528 return (DDI_SUCCESS);
1529 1529 }
1530 1530
1531 1531
1532 1532 /*
1533 1533 * tavor_qp_create_qpn()
1534 1534 * Context: Can be called from interrupt or base context.
1535 1535 */
1536 1536 static int
1537 1537 tavor_qp_create_qpn(tavor_state_t *state, tavor_qphdl_t qp, tavor_rsrc_t *qpc)
1538 1538 {
1539 1539 tavor_qpn_entry_t query;
1540 1540 tavor_qpn_entry_t *entry;
1541 1541 avl_index_t where;
1542 1542
1543 1543 TAVOR_TNF_ENTER(tavor_qp_create_qpn);
1544 1544
1545 1545 /*
1546 1546 * Build a query (for the AVL tree lookup) and attempt to find
1547 1547 * a previously added entry that has a matching QPC index. If
1548 1548 * no matching entry is found, then allocate, initialize, and
1549 1549 * add an entry to the AVL tree.
1550 1550 * If a matching entry is found, then increment its QPN counter
1551 1551 * and reference counter.
1552 1552 */
1553 1553 query.qpn_indx = qpc->tr_indx;
1554 1554 mutex_enter(&state->ts_qpn_avl_lock);
1555 1555 entry = (tavor_qpn_entry_t *)avl_find(&state->ts_qpn_avl,
1556 1556 &query, &where);
1557 1557 if (entry == NULL) {
1558 1558 /*
1559 1559 * Allocate and initialize a QPN entry, then insert
1560 1560 * it into the AVL tree.
1561 1561 */
1562 1562 entry = (tavor_qpn_entry_t *)kmem_zalloc(
1563 1563 sizeof (tavor_qpn_entry_t), KM_NOSLEEP);
1564 1564 if (entry == NULL) {
1565 1565 mutex_exit(&state->ts_qpn_avl_lock);
1566 1566 TAVOR_TNF_EXIT(tavor_qp_create_qpn);
1567 1567 return (DDI_FAILURE);
1568 1568 }
1569 1569 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*entry))
1570 1570
1571 1571 entry->qpn_indx = qpc->tr_indx;
1572 1572 entry->qpn_refcnt = 0;
1573 1573 entry->qpn_counter = 0;
1574 1574
1575 1575 avl_insert(&state->ts_qpn_avl, entry, where);
1576 1576 }
1577 1577
1578 1578 /*
1579 1579 * Make the AVL tree entry point to the QP context resource that
1580 1580 * it will be responsible for tracking
1581 1581 */
1582 1582 entry->qpn_qpc = qpc;
1583 1583
1584 1584 /*
1585 1585 * Setup the QP handle to point to the AVL tree entry. Then
1586 1586 * generate the new QP number from the entry's QPN counter value
1587 1587 * and the hardware's QP context table index.
1588 1588 */
1589 1589 qp->qp_qpn_hdl = entry;
1590 1590 qp->qp_qpnum = ((entry->qpn_counter <<
1591 1591 state->ts_cfg_profile->cp_log_num_qp) | qpc->tr_indx) &
1592 1592 TAVOR_QP_MAXNUMBER_MSK;
1593 1593
1594 1594 /*
1595 1595 * Increment the reference counter and QPN counter. The QPN
1596 1596 * counter always indicates the next available number for use.
1597 1597 */
1598 1598 entry->qpn_counter++;
1599 1599 entry->qpn_refcnt++;
1600 1600
1601 1601 mutex_exit(&state->ts_qpn_avl_lock);
1602 1602 TAVOR_TNF_EXIT(tavor_qp_create_qpn);
1603 1603 return (DDI_SUCCESS);
1604 1604 }
1605 1605
1606 1606
1607 1607 /*
1608 1608 * tavor_qp_release_qpn()
1609 1609 * Context: Can be called only from user or kernel context.
1610 1610 */
1611 1611 void
1612 1612 tavor_qp_release_qpn(tavor_state_t *state, tavor_qpn_entry_t *entry, int flags)
1613 1613 {
1614 1614 TAVOR_TNF_ENTER(tavor_qp_release_qpn);
1615 1615
1616 1616 ASSERT(entry != NULL);
1617 1617
1618 1618 mutex_enter(&state->ts_qpn_avl_lock);
1619 1619
1620 1620 /*
1621 1621 * If we are releasing the QP number here, then we decrement the
1622 1622 * reference count and check for zero references. If there are
1623 1623 * zero references, then we free the QPC context (if it hadn't
1624 1624 * already been freed during a TAVOR_QPN_FREE_ONLY free, i.e. for
1625 1625 * reuse with another similar QP number) and remove the tracking
1626 1626 * structure from the QP number AVL tree and free the structure.
1627 1627 * If we are not releasing the QP number here, then, as long as we
1628 1628 * have not exhausted the usefulness of the QPC context (that is,
1629 1629 * re-used it too many times without the reference count having
1630 1630 * gone to zero), we free up the QPC context for use by another
1631 1631 * thread (which will use it to construct a different QP number
1632 1632 * from the same QPC table index).
1633 1633 */
1634 1634 if (flags == TAVOR_QPN_RELEASE) {
1635 1635 entry->qpn_refcnt--;
1636 1636
1637 1637 /*
1638 1638 * If the reference count is zero, then we free the QPC
1639 1639 * context (if it hadn't already been freed in an early
1640 1640 * step, e.g. TAVOR_QPN_FREE_ONLY) and remove/free the
1641 1641 * tracking structure from the QP number AVL tree.
1642 1642 */
1643 1643 if (entry->qpn_refcnt == 0) {
1644 1644 if (entry->qpn_qpc != NULL) {
1645 1645 tavor_rsrc_free(state, &entry->qpn_qpc);
1646 1646 }
1647 1647
1648 1648 /*
1649 1649 * If the current entry has served it's useful
1650 1650 * purpose (i.e. been reused the maximum allowable
1651 1651 * number of times), then remove it from QP number
1652 1652 * AVL tree and free it up.
1653 1653 */
1654 1654 if (entry->qpn_counter >= (1 <<
1655 1655 (24 - state->ts_cfg_profile->cp_log_num_qp))) {
1656 1656 avl_remove(&state->ts_qpn_avl, entry);
1657 1657 kmem_free(entry, sizeof (tavor_qpn_entry_t));
1658 1658 }
1659 1659 }
1660 1660
1661 1661 } else if (flags == TAVOR_QPN_FREE_ONLY) {
1662 1662 /*
1663 1663 * Even if we are not freeing the QP number, that will not
1664 1664 * always prevent us from releasing the QPC context. In fact,
1665 1665 * since the QPC context only forms part of the whole QPN,
1666 1666 * we want to free it up for use by other consumers. But
1667 1667 * if the reference count is non-zero (which it will always
1668 1668 * be when we are doing TAVOR_QPN_FREE_ONLY) and the counter
1669 1669 * has reached its maximum value, then we cannot reuse the
1670 1670 * QPC context until the reference count eventually reaches
1671 1671 * zero (in TAVOR_QPN_RELEASE, above).
1672 1672 */
1673 1673 if (entry->qpn_counter < (1 <<
1674 1674 (24 - state->ts_cfg_profile->cp_log_num_qp))) {
1675 1675 tavor_rsrc_free(state, &entry->qpn_qpc);
1676 1676 }
1677 1677 }
1678 1678 mutex_exit(&state->ts_qpn_avl_lock);
1679 1679
1680 1680 TAVOR_TNF_EXIT(tavor_qp_release_qpn);
1681 1681 }
1682 1682
1683 1683
1684 1684 /*
1685 1685 * tavor_qpn_db_compare()
1686 1686 * Context: Can be called from user or kernel context.
1687 1687 */
1688 1688 static int
1689 1689 tavor_qpn_avl_compare(const void *q, const void *e)
1690 1690 {
1691 1691 tavor_qpn_entry_t *entry, *query;
1692 1692
1693 1693 TAVOR_TNF_ENTER(tavor_qpn_avl_compare);
1694 1694
1695 1695 entry = (tavor_qpn_entry_t *)e;
1696 1696 query = (tavor_qpn_entry_t *)q;
1697 1697
1698 1698 if (query->qpn_indx < entry->qpn_indx) {
1699 1699 TAVOR_TNF_EXIT(tavor_qpn_avl_compare);
1700 1700 return (-1);
1701 1701 } else if (query->qpn_indx > entry->qpn_indx) {
1702 1702 TAVOR_TNF_EXIT(tavor_qpn_avl_compare);
1703 1703 return (+1);
1704 1704 } else {
1705 1705 TAVOR_TNF_EXIT(tavor_qpn_avl_compare);
1706 1706 return (0);
1707 1707 }
1708 1708 }
1709 1709
1710 1710
1711 1711 /*
1712 1712 * tavor_qpn_avl_init()
1713 1713 * Context: Only called from attach() path context
1714 1714 */
1715 1715 void
1716 1716 tavor_qpn_avl_init(tavor_state_t *state)
1717 1717 {
1718 1718 TAVOR_TNF_ENTER(tavor_qpn_avl_init);
1719 1719
1720 1720 /* Initialize the lock used for QP number (QPN) AVL tree access */
1721 1721 mutex_init(&state->ts_qpn_avl_lock, NULL, MUTEX_DRIVER,
1722 1722 DDI_INTR_PRI(state->ts_intrmsi_pri));
1723 1723
1724 1724 /* Initialize the AVL tree for the QP number (QPN) storage */
1725 1725 avl_create(&state->ts_qpn_avl, tavor_qpn_avl_compare,
1726 1726 sizeof (tavor_qpn_entry_t),
1727 1727 offsetof(tavor_qpn_entry_t, qpn_avlnode));
1728 1728
1729 1729 TAVOR_TNF_EXIT(tavor_qpn_avl_init);
1730 1730 }
1731 1731
1732 1732
1733 1733 /*
1734 1734 * tavor_qpn_avl_fini()
1735 1735 * Context: Only called from attach() and/or detach() path contexts
1736 1736 */
1737 1737 void
1738 1738 tavor_qpn_avl_fini(tavor_state_t *state)
1739 1739 {
1740 1740 tavor_qpn_entry_t *entry;
1741 1741 void *cookie;
1742 1742
1743 1743 TAVOR_TNF_ENTER(tavor_qpn_avl_fini);
1744 1744
1745 1745 /*
1746 1746 * Empty all entries (if necessary) and destroy the AVL tree
1747 1747 * that was used for QP number (QPN) tracking.
1748 1748 */
1749 1749 cookie = NULL;
1750 1750 while ((entry = (tavor_qpn_entry_t *)avl_destroy_nodes(
1751 1751 &state->ts_qpn_avl, &cookie)) != NULL) {
1752 1752 kmem_free(entry, sizeof (tavor_qpn_entry_t));
1753 1753 }
1754 1754 avl_destroy(&state->ts_qpn_avl);
1755 1755
1756 1756 /* Destroy the lock used for QP number (QPN) AVL tree access */
1757 1757 mutex_destroy(&state->ts_qpn_avl_lock);
1758 1758
1759 1759 TAVOR_TNF_EXIT(tavor_qpn_avl_fini);
1760 1760 }
1761 1761
1762 1762
1763 1763 /*
1764 1764 * tavor_qphdl_from_qpnum()
1765 1765 * Context: Can be called from interrupt or base context.
1766 1766 *
1767 1767 * This routine is important because changing the unconstrained
1768 1768 * portion of the QP number is critical to the detection of a
1769 1769 * potential race condition in the QP event handler code (i.e. the case
1770 1770 * where a QP is freed and alloc'd again before an event for the
1771 1771 * "old" QP can be handled).
1772 1772 *
1773 1773 * While this is not a perfect solution (not sure that one exists)
1774 1774 * it does help to mitigate the chance that this race condition will
1775 1775 * cause us to deliver a "stale" event to the new QP owner. Note:
1776 1776 * this solution does not scale well because the number of constrained
1777 1777 * bits increases (and, hence, the number of unconstrained bits
1778 1778 * decreases) as the number of supported QPs grows. For small and
1779 1779 * intermediate values, it should hopefully provide sufficient
1780 1780 * protection.
1781 1781 */
1782 1782 tavor_qphdl_t
1783 1783 tavor_qphdl_from_qpnum(tavor_state_t *state, uint_t qpnum)
1784 1784 {
1785 1785 uint_t qpindx, qpmask;
1786 1786
1787 1787 /* Calculate the QP table index from the qpnum */
1788 1788 qpmask = (1 << state->ts_cfg_profile->cp_log_num_qp) - 1;
1789 1789 qpindx = qpnum & qpmask;
1790 1790 return (state->ts_qphdl[qpindx]);
1791 1791 }
1792 1792
1793 1793
1794 1794 /*
1795 1795 * tavor_special_qp_rsrc_alloc
1796 1796 * Context: Can be called from interrupt or base context.
1797 1797 */
1798 1798 static int
1799 1799 tavor_special_qp_rsrc_alloc(tavor_state_t *state, ibt_sqp_type_t type,
1800 1800 uint_t port, tavor_rsrc_t **qp_rsrc)
1801 1801 {
1802 1802 uint_t mask, flags;
1803 1803 int status;
1804 1804
1805 1805 TAVOR_TNF_ENTER(tavor_special_qp_rsrc_alloc);
1806 1806
1807 1807 mutex_enter(&state->ts_spec_qplock);
1808 1808 flags = state->ts_spec_qpflags;
1809 1809 if (type == IBT_SMI_SQP) {
1810 1810 /*
1811 1811 * Check here to see if the driver has been configured
1812 1812 * to instruct the Tavor firmware to handle all incoming
1813 1813 * SMP messages (i.e. messages sent to SMA). If so,
1814 1814 * then we will treat QP0 as if it has already been
1815 1815 * allocated (for internal use). Otherwise, if we allow
1816 1816 * the allocation to happen, it will cause unexpected
1817 1817 * behaviors (e.g. Tavor SMA becomes unresponsive).
1818 1818 */
1819 1819 if (state->ts_cfg_profile->cp_qp0_agents_in_fw != 0) {
1820 1820 mutex_exit(&state->ts_spec_qplock);
1821 1821 TNF_PROBE_0(tavor_special_qp0_alloc_already_in_fw,
1822 1822 TAVOR_TNF_ERROR, "");
1823 1823 TAVOR_TNF_EXIT(tavor_special_qp_rsrc_alloc);
1824 1824 return (IBT_QP_IN_USE);
1825 1825 }
1826 1826
1827 1827 /*
1828 1828 * If this is the first QP0 allocation, then post
1829 1829 * a CONF_SPECIAL_QP firmware command
1830 1830 */
1831 1831 if ((flags & TAVOR_SPECIAL_QP0_RSRC_MASK) == 0) {
1832 1832 status = tavor_conf_special_qp_cmd_post(state,
1833 1833 state->ts_spec_qp0->tr_indx, TAVOR_CMD_QP_SMI,
1834 1834 TAVOR_CMD_NOSLEEP_SPIN);
1835 1835 if (status != TAVOR_CMD_SUCCESS) {
1836 1836 mutex_exit(&state->ts_spec_qplock);
1837 1837 cmn_err(CE_CONT, "Tavor: CONF_SPECIAL_QP "
1838 1838 "command failed: %08x\n", status);
1839 1839 TNF_PROBE_1(tavor_conf_special_qp_cmd_fail,
1840 1840 TAVOR_TNF_ERROR, "", tnf_uint, status,
1841 1841 status);
1842 1842 TAVOR_TNF_EXIT(tavor_special_qp_rsrc_alloc);
1843 1843 return (IBT_INSUFF_RESOURCE);
1844 1844 }
1845 1845 }
1846 1846
1847 1847 /*
1848 1848 * Now check (and, if necessary, modify) the flags to indicate
1849 1849 * whether the allocation was successful
1850 1850 */
1851 1851 mask = (1 << (TAVOR_SPECIAL_QP0_RSRC + port));
1852 1852 if (flags & mask) {
1853 1853 mutex_exit(&state->ts_spec_qplock);
1854 1854 TNF_PROBE_1(tavor_ts_spec_qp0_alloc_already,
1855 1855 TAVOR_TNF_ERROR, "", tnf_uint, port, port);
1856 1856 TAVOR_TNF_EXIT(tavor_special_qp_rsrc_alloc);
1857 1857 return (IBT_QP_IN_USE);
1858 1858 }
1859 1859 state->ts_spec_qpflags |= mask;
1860 1860 *qp_rsrc = state->ts_spec_qp0;
1861 1861
1862 1862 } else {
1863 1863 /*
1864 1864 * If this is the first QP1 allocation, then post
1865 1865 * a CONF_SPECIAL_QP firmware command
1866 1866 */
1867 1867 if ((flags & TAVOR_SPECIAL_QP1_RSRC_MASK) == 0) {
1868 1868 status = tavor_conf_special_qp_cmd_post(state,
1869 1869 state->ts_spec_qp1->tr_indx, TAVOR_CMD_QP_GSI,
1870 1870 TAVOR_CMD_NOSLEEP_SPIN);
1871 1871 if (status != TAVOR_CMD_SUCCESS) {
1872 1872 mutex_exit(&state->ts_spec_qplock);
1873 1873 cmn_err(CE_CONT, "Tavor: CONF_SPECIAL_QP "
1874 1874 "command failed: %08x\n", status);
1875 1875 TNF_PROBE_1(tavor_conf_special_qp_cmd_fail,
1876 1876 TAVOR_TNF_ERROR, "", tnf_uint, status,
1877 1877 status);
1878 1878 TAVOR_TNF_EXIT(tavor_special_qp_rsrc_alloc);
1879 1879 return (IBT_INSUFF_RESOURCE);
1880 1880 }
1881 1881 }
1882 1882
1883 1883 /*
1884 1884 * Now check (and, if necessary, modify) the flags to indicate
1885 1885 * whether the allocation was successful
1886 1886 */
1887 1887 mask = (1 << (TAVOR_SPECIAL_QP1_RSRC + port));
1888 1888 if (flags & mask) {
1889 1889 mutex_exit(&state->ts_spec_qplock);
1890 1890 TNF_PROBE_0(tavor_ts_spec_qp1_alloc_already,
1891 1891 TAVOR_TNF_ERROR, "");
1892 1892 TAVOR_TNF_EXIT(tavor_special_qp_rsrc_alloc);
1893 1893 return (IBT_QP_IN_USE);
1894 1894 }
1895 1895 state->ts_spec_qpflags |= mask;
1896 1896 *qp_rsrc = state->ts_spec_qp1;
1897 1897 }
1898 1898
1899 1899 mutex_exit(&state->ts_spec_qplock);
1900 1900 TAVOR_TNF_EXIT(tavor_special_qp_rsrc_alloc);
1901 1901 return (DDI_SUCCESS);
1902 1902 }
1903 1903
1904 1904
1905 1905 /*
1906 1906 * tavor_special_qp_rsrc_free
1907 1907 * Context: Can be called from interrupt or base context.
1908 1908 */
1909 1909 static int
1910 1910 tavor_special_qp_rsrc_free(tavor_state_t *state, ibt_sqp_type_t type,
1911 1911 uint_t port)
1912 1912 {
1913 1913 uint_t mask, flags;
1914 1914 int status;
1915 1915
1916 1916 TAVOR_TNF_ENTER(tavor_special_qp_rsrc_free);
1917 1917
1918 1918 mutex_enter(&state->ts_spec_qplock);
1919 1919 if (type == IBT_SMI_SQP) {
1920 1920 mask = (1 << (TAVOR_SPECIAL_QP0_RSRC + port));
1921 1921 state->ts_spec_qpflags &= ~mask;
1922 1922 flags = state->ts_spec_qpflags;
1923 1923
1924 1924 /*
1925 1925 * If this is the last QP0 free, then post a CONF_SPECIAL_QP
1926 1926 * firmware command
1927 1927 */
1928 1928 if ((flags & TAVOR_SPECIAL_QP0_RSRC_MASK) == 0) {
1929 1929 status = tavor_conf_special_qp_cmd_post(state, 0,
1930 1930 TAVOR_CMD_QP_SMI, TAVOR_CMD_NOSLEEP_SPIN);
1931 1931 if (status != TAVOR_CMD_SUCCESS) {
1932 1932 mutex_exit(&state->ts_spec_qplock);
1933 1933 cmn_err(CE_CONT, "Tavor: CONF_SPECIAL_QP "
1934 1934 "command failed: %08x\n", status);
1935 1935 TNF_PROBE_1(tavor_conf_special_qp_cmd_fail,
1936 1936 TAVOR_TNF_ERROR, "", tnf_uint, status,
1937 1937 status);
1938 1938 TAVOR_TNF_EXIT(tavor_special_qp_rsrc_free);
1939 1939 return (ibc_get_ci_failure(0));
1940 1940 }
1941 1941 }
1942 1942 } else {
1943 1943 mask = (1 << (TAVOR_SPECIAL_QP1_RSRC + port));
1944 1944 state->ts_spec_qpflags &= ~mask;
1945 1945 flags = state->ts_spec_qpflags;
1946 1946
1947 1947 /*
1948 1948 * If this is the last QP1 free, then post a CONF_SPECIAL_QP
1949 1949 * firmware command
1950 1950 */
1951 1951 if ((flags & TAVOR_SPECIAL_QP1_RSRC_MASK) == 0) {
1952 1952 status = tavor_conf_special_qp_cmd_post(state, 0,
1953 1953 TAVOR_CMD_QP_GSI, TAVOR_CMD_NOSLEEP_SPIN);
1954 1954 if (status != TAVOR_CMD_SUCCESS) {
1955 1955 mutex_exit(&state->ts_spec_qplock);
1956 1956 cmn_err(CE_CONT, "Tavor: CONF_SPECIAL_QP "
1957 1957 "command failed: %08x\n", status);
1958 1958 TNF_PROBE_1(tavor_conf_special_qp_cmd_fail,
1959 1959 TAVOR_TNF_ERROR, "", tnf_uint, status,
1960 1960 status);
1961 1961 TAVOR_TNF_EXIT(tavor_special_qp_rsrc_free);
1962 1962 return (ibc_get_ci_failure(0));
1963 1963 }
1964 1964 }
1965 1965 }
1966 1966
1967 1967 mutex_exit(&state->ts_spec_qplock);
1968 1968 TAVOR_TNF_EXIT(tavor_special_qp_rsrc_free);
1969 1969 return (DDI_SUCCESS);
1970 1970 }
1971 1971
1972 1972
1973 1973 /*
1974 1974 * tavor_qp_sgl_to_logwqesz()
1975 1975 * Context: Can be called from interrupt or base context.
1976 1976 */
1977 1977 static void
1978 1978 tavor_qp_sgl_to_logwqesz(tavor_state_t *state, uint_t num_sgl,
1979 1979 tavor_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl)
1980 1980 {
1981 1981 uint_t max_size, log2, actual_sgl;
1982 1982
1983 1983 TAVOR_TNF_ENTER(tavor_qp_sgl_to_logwqesz);
↓ open down ↓ |
1163 lines elided |
↑ open up ↑ |
1984 1984
1985 1985 switch (wq_type) {
1986 1986 case TAVOR_QP_WQ_TYPE_SENDQ:
1987 1987 /*
1988 1988 * Use requested maximum SGL to calculate max descriptor size
1989 1989 * (while guaranteeing that the descriptor size is a
1990 1990 * power-of-2 cachelines).
1991 1991 */
1992 1992 max_size = (TAVOR_QP_WQE_MLX_SND_HDRS + (num_sgl << 4));
1993 1993 log2 = highbit(max_size);
1994 - if ((max_size & (max_size - 1)) == 0) {
1994 + if (ISP2(max_size)) {
1995 1995 log2 = log2 - 1;
1996 1996 }
1997 1997
1998 1998 /* Make sure descriptor is at least the minimum size */
1999 1999 log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM);
2000 2000
2001 2001 /* Calculate actual number of SGL (given WQE size) */
2002 2002 actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_SND_HDRS) >> 4;
2003 2003 break;
2004 2004
2005 2005 case TAVOR_QP_WQ_TYPE_RECVQ:
2006 2006 /*
2007 2007 * Same as above (except for Recv WQEs)
2008 2008 */
2009 2009 max_size = (TAVOR_QP_WQE_MLX_RCV_HDRS + (num_sgl << 4));
2010 2010 log2 = highbit(max_size);
2011 - if ((max_size & (max_size - 1)) == 0) {
2011 + if (ISP2(max_size)) {
2012 2012 log2 = log2 - 1;
2013 2013 }
2014 2014
2015 2015 /* Make sure descriptor is at least the minimum size */
2016 2016 log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM);
2017 2017
2018 2018 /* Calculate actual number of SGL (given WQE size) */
2019 2019 actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_RCV_HDRS) >> 4;
2020 2020 break;
2021 2021
2022 2022 case TAVOR_QP_WQ_TYPE_SENDMLX_QP0:
2023 2023 /*
2024 2024 * Same as above (except for MLX transport WQEs). For these
2025 2025 * WQEs we have to account for the space consumed by the
2026 2026 * "inline" packet headers. (This is smaller than for QP1
2027 2027 * below because QP0 is not allowed to send packets with a GRH.
2028 2028 */
2029 2029 max_size = (TAVOR_QP_WQE_MLX_QP0_HDRS + (num_sgl << 4));
2030 2030 log2 = highbit(max_size);
2031 - if ((max_size & (max_size - 1)) == 0) {
2031 + if (ISP2(max_size)) {
2032 2032 log2 = log2 - 1;
2033 2033 }
2034 2034
2035 2035 /* Make sure descriptor is at least the minimum size */
2036 2036 log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM);
2037 2037
2038 2038 /* Calculate actual number of SGL (given WQE size) */
2039 2039 actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_QP0_HDRS) >> 4;
2040 2040 break;
2041 2041
2042 2042 case TAVOR_QP_WQ_TYPE_SENDMLX_QP1:
↓ open down ↓ |
1 lines elided |
↑ open up ↑ |
2043 2043 /*
2044 2044 * Same as above. For these WQEs we again have to account for
2045 2045 * the space consumed by the "inline" packet headers. (This
2046 2046 * is larger than for QP0 above because we have to account for
2047 2047 * the possibility of a GRH in each packet - and this
2048 2048 * introduces an alignment issue that causes us to consume
2049 2049 * an additional 8 bytes).
2050 2050 */
2051 2051 max_size = (TAVOR_QP_WQE_MLX_QP1_HDRS + (num_sgl << 4));
2052 2052 log2 = highbit(max_size);
2053 - if ((max_size & (max_size - 1)) == 0) {
2053 + if (ISP2(max_size)) {
2054 2054 log2 = log2 - 1;
2055 2055 }
2056 2056
2057 2057 /* Make sure descriptor is at least the minimum size */
2058 2058 log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM);
2059 2059
2060 2060 /* Calculate actual number of SGL (given WQE size) */
2061 2061 actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_QP1_HDRS) >> 4;
2062 2062 break;
2063 2063
2064 2064 default:
2065 2065 TAVOR_WARNING(state, "unexpected work queue type");
2066 2066 TNF_PROBE_0(tavor_qp_sgl_to_logwqesz_inv_wqtype_fail,
2067 2067 TAVOR_TNF_ERROR, "");
2068 2068 break;
2069 2069 }
2070 2070
2071 2071 /* Fill in the return values */
2072 2072 *logwqesz = log2;
2073 2073 *max_sgl = min(state->ts_cfg_profile->cp_wqe_real_max_sgl, actual_sgl);
2074 2074
2075 2075 TAVOR_TNF_EXIT(tavor_qp_sgl_to_logwqesz);
2076 2076 }
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX