1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * tavor_qp.c
  29  *    Tavor Queue Pair Processing Routines
  30  *
  31  *    Implements all the routines necessary for allocating, freeing, and
  32  *    querying the Tavor queue pairs.
  33  */
  34 
  35 #include <sys/types.h>
  36 #include <sys/conf.h>
  37 #include <sys/ddi.h>
  38 #include <sys/sunddi.h>
  39 #include <sys/modctl.h>
  40 #include <sys/bitmap.h>
  41 #include <sys/sysmacros.h>
  42 
  43 #include <sys/ib/adapters/tavor/tavor.h>
  44 #include <sys/ib/ib_pkt_hdrs.h>
  45 
  46 static int tavor_qp_create_qpn(tavor_state_t *state, tavor_qphdl_t qp,
  47     tavor_rsrc_t *qpc);
  48 static int tavor_qpn_avl_compare(const void *q, const void *e);
  49 static int tavor_special_qp_rsrc_alloc(tavor_state_t *state,
  50     ibt_sqp_type_t type, uint_t port, tavor_rsrc_t **qp_rsrc);
  51 static int tavor_special_qp_rsrc_free(tavor_state_t *state, ibt_sqp_type_t type,
  52     uint_t port);
  53 static void tavor_qp_sgl_to_logwqesz(tavor_state_t *state, uint_t num_sgl,
  54     tavor_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl);
  55 
  56 /*
  57  * tavor_qp_alloc()
  58  *    Context: Can be called only from user or kernel context.
  59  */
  60 int
  61 tavor_qp_alloc(tavor_state_t *state, tavor_qp_info_t *qpinfo,
  62     uint_t sleepflag, tavor_qp_options_t *op)
  63 {
  64         tavor_rsrc_pool_info_t  *rsrc_pool;
  65         tavor_rsrc_t            *qpc, *rsrc, *rdb;
  66         tavor_umap_db_entry_t   *umapdb;
  67         tavor_qphdl_t           qp;
  68         ibt_qp_alloc_attr_t     *attr_p;
  69         ibt_qp_type_t           type;
  70         ibtl_qp_hdl_t           ibt_qphdl;
  71         ibt_chan_sizes_t        *queuesz_p;
  72         ib_qpn_t                *qpn;
  73         tavor_qphdl_t           *qphdl;
  74         ibt_mr_attr_t           mr_attr;
  75         tavor_mr_options_t      mr_op;
  76         tavor_srqhdl_t          srq;
  77         tavor_pdhdl_t           pd;
  78         tavor_cqhdl_t           sq_cq, rq_cq;
  79         tavor_mrhdl_t           mr;
  80         uint64_t                value, qp_desc_off;
  81         uint32_t                *sq_buf, *rq_buf;
  82         uint32_t                log_qp_sq_size, log_qp_rq_size;
  83         uint32_t                sq_size, rq_size;
  84         uint32_t                sq_wqe_size, rq_wqe_size;
  85         uint32_t                max_rdb, max_sgl, uarpg;
  86         uint_t                  wq_location, dma_xfer_mode, qp_is_umap;
  87         uint_t                  qp_srq_en;
  88         int                     status, flag;
  89         char                    *errormsg;
  90 
  91         TAVOR_TNF_ENTER(tavor_qp_alloc);
  92 
  93         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p, *queuesz_p))
  94 
  95         /*
  96          * Check the "options" flag.  Currently this flag tells the driver
  97          * whether or not the QP's work queues should be come from normal
  98          * system memory or whether they should be allocated from DDR memory.
  99          */
 100         if (op == NULL) {
 101                 wq_location = TAVOR_QUEUE_LOCATION_NORMAL;
 102         } else {
 103                 wq_location = op->qpo_wq_loc;
 104         }
 105 
 106         /*
 107          * Extract the necessary info from the tavor_qp_info_t structure
 108          */
 109         attr_p    = qpinfo->qpi_attrp;
 110         type      = qpinfo->qpi_type;
 111         ibt_qphdl = qpinfo->qpi_ibt_qphdl;
 112         queuesz_p = qpinfo->qpi_queueszp;
 113         qpn       = qpinfo->qpi_qpn;
 114         qphdl     = &qpinfo->qpi_qphdl;
 115 
 116         /*
 117          * Determine whether QP is being allocated for userland access or
 118          * whether it is being allocated for kernel access.  If the QP is
 119          * being allocated for userland access, then lookup the UAR doorbell
 120          * page number for the current process.  Note:  If this is not found
 121          * (e.g. if the process has not previously open()'d the Tavor driver),
 122          * then an error is returned.
 123          */
 124         qp_is_umap = (attr_p->qp_alloc_flags & IBT_QP_USER_MAP) ? 1 : 0;
 125         if (qp_is_umap) {
 126                 status = tavor_umap_db_find(state->ts_instance, ddi_get_pid(),
 127                     MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
 128                 if (status != DDI_SUCCESS) {
 129                         /* Set "status" and "errormsg" and goto failure */
 130                         TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "failed UAR page");
 131                         goto qpalloc_fail;
 132                 }
 133                 uarpg = ((tavor_rsrc_t *)(uintptr_t)value)->tr_indx;
 134         }
 135 
 136         /*
 137          * Determine whether QP is being associated with an SRQ
 138          */
 139         qp_srq_en = (attr_p->qp_alloc_flags & IBT_QP_USES_SRQ) ? 1 : 0;
 140         if (qp_srq_en) {
 141                 /*
 142                  * Check for valid SRQ handle pointers
 143                  */
 144                 if (attr_p->qp_ibc_srq_hdl == NULL) {
 145                         /* Set "status" and "errormsg" and goto failure */
 146                         TAVOR_TNF_FAIL(IBT_SRQ_HDL_INVALID,
 147                             "invalid SRQ handle");
 148                         goto qpalloc_fail;
 149                 }
 150                 srq = (tavor_srqhdl_t)attr_p->qp_ibc_srq_hdl;
 151         }
 152 
 153         /*
 154          * Check for valid QP service type (only UD/RC/UC supported)
 155          */
 156         if (((type != IBT_UD_RQP) && (type != IBT_RC_RQP) &&
 157             (type != IBT_UC_RQP))) {
 158                 /* Set "status" and "errormsg" and goto failure */
 159                 TAVOR_TNF_FAIL(IBT_QP_SRV_TYPE_INVALID, "invalid serv type");
 160                 goto qpalloc_fail;
 161         }
 162 
 163         /*
 164          * Only RC is supported on an SRQ -- This is a Tavor hardware
 165          * limitation.  Arbel native mode will not have this shortcoming.
 166          */
 167         if (qp_srq_en && type != IBT_RC_RQP) {
 168                 /* Set "status" and "errormsg" and goto failure */
 169                 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid serv type with SRQ");
 170                 goto qpalloc_fail;
 171         }
 172 
 173         /*
 174          * Check for valid PD handle pointer
 175          */
 176         if (attr_p->qp_pd_hdl == NULL) {
 177                 /* Set "status" and "errormsg" and goto failure */
 178                 TAVOR_TNF_FAIL(IBT_PD_HDL_INVALID, "invalid PD handle");
 179                 goto qpalloc_fail;
 180         }
 181         pd = (tavor_pdhdl_t)attr_p->qp_pd_hdl;
 182 
 183         /*
 184          * If on an SRQ, check to make sure the PD is the same
 185          */
 186         if (qp_srq_en && (pd->pd_pdnum != srq->srq_pdhdl->pd_pdnum)) {
 187                 /* Set "status" and "errormsg" and goto failure */
 188                 TAVOR_TNF_FAIL(IBT_PD_HDL_INVALID, "invalid PD handle");
 189                 goto qpalloc_fail;
 190         }
 191 
 192         /* Increment the reference count on the protection domain (PD) */
 193         tavor_pd_refcnt_inc(pd);
 194 
 195         /*
 196          * Check for valid CQ handle pointers
 197          */
 198         if ((attr_p->qp_ibc_scq_hdl == NULL) ||
 199             (attr_p->qp_ibc_rcq_hdl == NULL)) {
 200                 /* Set "status" and "errormsg" and goto failure */
 201                 TAVOR_TNF_FAIL(IBT_CQ_HDL_INVALID, "invalid CQ handle");
 202                 goto qpalloc_fail1;
 203         }
 204         sq_cq = (tavor_cqhdl_t)attr_p->qp_ibc_scq_hdl;
 205         rq_cq = (tavor_cqhdl_t)attr_p->qp_ibc_rcq_hdl;
 206 
 207         /*
 208          * Increment the reference count on the CQs.  One or both of these
 209          * could return error if we determine that the given CQ is already
 210          * being used with a special (SMI/GSI) QP.
 211          */
 212         status = tavor_cq_refcnt_inc(sq_cq, TAVOR_CQ_IS_NORMAL);
 213         if (status != DDI_SUCCESS) {
 214                 /* Set "status" and "errormsg" and goto failure */
 215                 TAVOR_TNF_FAIL(IBT_CQ_HDL_INVALID, "invalid CQ handle");
 216                 goto qpalloc_fail1;
 217         }
 218         status = tavor_cq_refcnt_inc(rq_cq, TAVOR_CQ_IS_NORMAL);
 219         if (status != DDI_SUCCESS) {
 220                 /* Set "status" and "errormsg" and goto failure */
 221                 TAVOR_TNF_FAIL(IBT_CQ_HDL_INVALID, "invalid CQ handle");
 222                 goto qpalloc_fail2;
 223         }
 224 
 225         /*
 226          * Allocate an QP context entry.  This will be filled in with all
 227          * the necessary parameters to define the Queue Pair.  Unlike
 228          * other Tavor hardware resources, ownership is not immediately
 229          * given to hardware in the final step here.  Instead, we must
 230          * wait until the QP is later transitioned to the "Init" state before
 231          * passing the QP to hardware.  If we fail here, we must undo all
 232          * the reference count (CQ and PD).
 233          */
 234         status = tavor_rsrc_alloc(state, TAVOR_QPC, 1, sleepflag, &qpc);
 235         if (status != DDI_SUCCESS) {
 236                 /* Set "status" and "errormsg" and goto failure */
 237                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed QP context");
 238                 goto qpalloc_fail3;
 239         }
 240 
 241         /*
 242          * Allocate the software structure for tracking the queue pair
 243          * (i.e. the Tavor Queue Pair handle).  If we fail here, we must
 244          * undo the reference counts and the previous resource allocation.
 245          */
 246         status = tavor_rsrc_alloc(state, TAVOR_QPHDL, 1, sleepflag, &rsrc);
 247         if (status != DDI_SUCCESS) {
 248                 /* Set "status" and "errormsg" and goto failure */
 249                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed QP handle");
 250                 goto qpalloc_fail4;
 251         }
 252         qp = (tavor_qphdl_t)rsrc->tr_addr;
 253         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
 254 
 255         /*
 256          * Calculate the QP number from QPC index.  This routine handles
 257          * all of the operations necessary to keep track of used, unused,
 258          * and released QP numbers.
 259          */
 260         status = tavor_qp_create_qpn(state, qp, qpc);
 261         if (status != DDI_SUCCESS) {
 262                 /* Set "status" and "errormsg" and goto failure */
 263                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed QPN create");
 264                 goto qpalloc_fail5;
 265         }
 266 
 267         /*
 268          * If this will be a user-mappable QP, then allocate an entry for
 269          * the "userland resources database".  This will later be added to
 270          * the database (after all further QP operations are successful).
 271          * If we fail here, we must undo the reference counts and the
 272          * previous resource allocation.
 273          */
 274         if (qp_is_umap) {
 275                 umapdb = tavor_umap_db_alloc(state->ts_instance, qp->qp_qpnum,
 276                     MLNX_UMAP_QPMEM_RSRC, (uint64_t)(uintptr_t)rsrc);
 277                 if (umapdb == NULL) {
 278                         /* Set "status" and "errormsg" and goto failure */
 279                         TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add");
 280                         goto qpalloc_fail6;
 281                 }
 282         }
 283 
 284         /*
 285          * If this is an RC QP, then pre-allocate the maximum number of RDB
 286          * entries.  This allows us to ensure that we can later cover all
 287          * the resources needed by hardware for handling multiple incoming
 288          * RDMA Reads.  Note: These resources are obviously not always
 289          * necessary.  They are allocated here anyway.  Someday maybe this
 290          * can be modified to allocate these on-the-fly (i.e. only if RDMA
 291          * Read or Atomic operations are enabled) XXX
 292          * If we fail here, we have a bunch of resource and reference count
 293          * cleanup to do.
 294          */
 295         if (type == IBT_RC_RQP) {
 296                 max_rdb = state->ts_cfg_profile->cp_hca_max_rdma_in_qp;
 297                 status = tavor_rsrc_alloc(state, TAVOR_RDB, max_rdb,
 298                     sleepflag, &rdb);
 299                 if (status != DDI_SUCCESS) {
 300                         /* Set "status" and "errormsg" and goto failure */
 301                         TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed RDB");
 302                         goto qpalloc_fail7;
 303                 }
 304                 qp->qp_rdbrsrcp = rdb;
 305                 /* Calculate offset (into DDR memory) of RDB entries */
 306                 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_RDB];
 307                 qp->qp_rdb_ddraddr = (uintptr_t)rsrc_pool->rsrc_ddr_offset +
 308                     (rdb->tr_indx << TAVOR_RDB_SIZE_SHIFT);
 309         }
 310 
 311         /*
 312          * Calculate the appropriate size for the work queues.
 313          * Note:  All Tavor QP work queues must be a power-of-2 in size.  Also
 314          * they may not be any smaller than TAVOR_QP_MIN_SIZE.  This step is
 315          * to round the requested size up to the next highest power-of-2
 316          */
 317         attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq, TAVOR_QP_MIN_SIZE);
 318         attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq, TAVOR_QP_MIN_SIZE);
 319         log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq);
 320         if ((attr_p->qp_sizes.cs_sq & (attr_p->qp_sizes.cs_sq - 1)) == 0) {
 321                 log_qp_sq_size = log_qp_sq_size - 1;
 322         }
 323         log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
 324         if ((attr_p->qp_sizes.cs_rq & (attr_p->qp_sizes.cs_rq - 1)) == 0) {
 325                 log_qp_rq_size = log_qp_rq_size - 1;
 326         }
 327 
 328         /*
 329          * Next we verify that the rounded-up size is valid (i.e. consistent
 330          * with the device limits and/or software-configured limits).  If not,
 331          * then obviously we have a lot of cleanup to do before returning.
 332          */
 333         if ((log_qp_sq_size > state->ts_cfg_profile->cp_log_max_qp_sz) ||
 334             (!qp_srq_en && (log_qp_rq_size >
 335             state->ts_cfg_profile->cp_log_max_qp_sz))) {
 336                 /* Set "status" and "errormsg" and goto failure */
 337                 TAVOR_TNF_FAIL(IBT_HCA_WR_EXCEEDED, "max QP size");
 338                 goto qpalloc_fail8;
 339         }
 340 
 341         /*
 342          * Next we verify that the requested number of SGL is valid (i.e.
 343          * consistent with the device limits and/or software-configured
 344          * limits).  If not, then obviously the same cleanup needs to be done.
 345          */
 346         max_sgl = state->ts_cfg_profile->cp_wqe_real_max_sgl;
 347         if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
 348             (!qp_srq_en && (attr_p->qp_sizes.cs_rq_sgl > max_sgl))) {
 349                 /* Set "status" and "errormsg" and goto failure */
 350                 TAVOR_TNF_FAIL(IBT_HCA_SGL_EXCEEDED, "max QP SGL");
 351                 goto qpalloc_fail8;
 352         }
 353 
 354         /*
 355          * Determine this QP's WQE sizes (for both the Send and Recv WQEs).
 356          * This will depend on the requested number of SGLs.  Note: this
 357          * has the side-effect of also calculating the real number of SGLs
 358          * (for the calculated WQE size).
 359          *
 360          * For QP's on an SRQ, we set these to 0.
 361          */
 362         if (qp_srq_en) {
 363                 qp->qp_rq_log_wqesz = 0;
 364                 qp->qp_rq_sgl = 0;
 365         } else {
 366                 tavor_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
 367                     TAVOR_QP_WQ_TYPE_RECVQ, &qp->qp_rq_log_wqesz,
 368                     &qp->qp_rq_sgl);
 369         }
 370         tavor_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
 371             TAVOR_QP_WQ_TYPE_SENDQ, &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
 372 
 373         /*
 374          * Allocate the memory for QP work queues.  Note:  The location from
 375          * which we will allocate these work queues has been passed in
 376          * through the tavor_qp_options_t structure.  Since Tavor work queues
 377          * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
 378          * the work queue memory is very important.  We used to allocate
 379          * work queues (the combined receive and send queues) so that they
 380          * would be aligned on their combined size.  That alignment guaranteed
 381          * that they would never cross the 4GB boundary (Tavor work queues
 382          * are on the order of MBs at maximum).  Now we are able to relax
 383          * this alignment constraint by ensuring that the IB address assigned
 384          * to the queue memory (as a result of the tavor_mr_register() call)
 385          * is offset from zero.
 386          * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
 387          * guarantee the alignment, but when attempting to use IOMMU bypass
 388          * mode we found that we were not allowed to specify any alignment
 389          * that was more restrictive than the system page size.
 390          * So we avoided this constraint by passing two alignment values,
 391          * one for the memory allocation itself and the other for the DMA
 392          * handle (for later bind).  This used to cause more memory than
 393          * necessary to be allocated (in order to guarantee the more
 394          * restrictive alignment contraint).  But be guaranteeing the
 395          * zero-based IB virtual address for the queue, we are able to
 396          * conserve this memory.
 397          * Note: If QP is not user-mappable, then it may come from either
 398          * kernel system memory or from HCA-attached local DDR memory.
 399          */
 400         sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
 401         sq_size     = (1 << log_qp_sq_size) * sq_wqe_size;
 402 
 403         /* QP on SRQ sets these to 0 */
 404         if (qp_srq_en) {
 405                 rq_wqe_size = 0;
 406                 rq_size     = 0;
 407         } else {
 408                 rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
 409                 rq_size     = (1 << log_qp_rq_size) * rq_wqe_size;
 410         }
 411 
 412         qp->qp_wqinfo.qa_size = sq_size + rq_size;
 413         qp->qp_wqinfo.qa_alloc_align = max(sq_wqe_size, rq_wqe_size);
 414         qp->qp_wqinfo.qa_bind_align  = max(sq_wqe_size, rq_wqe_size);
 415         if (qp_is_umap) {
 416                 qp->qp_wqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND;
 417         } else {
 418                 qp->qp_wqinfo.qa_location = wq_location;
 419         }
 420         status = tavor_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
 421         if (status != DDI_SUCCESS) {
 422                 /* Set "status" and "errormsg" and goto failure */
 423                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed work queue");
 424                 goto qpalloc_fail8;
 425         }
 426         if (sq_wqe_size > rq_wqe_size) {
 427                 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
 428 
 429                 /*
 430                  * If QP's on an SRQ, we set the rq_buf to NULL
 431                  */
 432                 if (qp_srq_en)
 433                         rq_buf = NULL;
 434                 else
 435                         rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
 436         } else {
 437                 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
 438                 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
 439         }
 440 
 441         /*
 442          * Register the memory for the QP work queues.  The memory for the
 443          * QP must be registered in the Tavor TPT tables.  This gives us the
 444          * LKey to specify in the QP context later.  Note: The memory for
 445          * Tavor work queues (both Send and Recv) must be contiguous and
 446          * registered as a single memory region.  Note also: If the work
 447          * queue is to be allocated from DDR memory, then only a "bypass"
 448          * mapping is appropriate.  And if the QP memory is user-mappable,
 449          * then we force DDI_DMA_CONSISTENT mapping.
 450          * Also, in order to meet the alignment restriction, we pass the
 451          * "mro_bind_override_addr" flag in the call to tavor_mr_register().
 452          * This guarantees that the resulting IB vaddr will be zero-based
 453          * (modulo the offset into the first page).
 454          * If we fail here, we still have the bunch of resource and reference
 455          * count cleanup to do.
 456          */
 457         flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP :
 458             IBT_MR_NOSLEEP;
 459         mr_attr.mr_vaddr    = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
 460         mr_attr.mr_len      = qp->qp_wqinfo.qa_size;
 461         mr_attr.mr_as       = NULL;
 462         mr_attr.mr_flags    = flag;
 463         if (qp_is_umap) {
 464                 mr_op.mro_bind_type = state->ts_cfg_profile->cp_iommu_bypass;
 465         } else {
 466                 if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) {
 467                         mr_op.mro_bind_type =
 468                             state->ts_cfg_profile->cp_iommu_bypass;
 469                         dma_xfer_mode =
 470                             state->ts_cfg_profile->cp_streaming_consistent;
 471                         if (dma_xfer_mode == DDI_DMA_STREAMING) {
 472                                 mr_attr.mr_flags |= IBT_MR_NONCOHERENT;
 473                         }
 474                 } else {
 475                         mr_op.mro_bind_type = TAVOR_BINDMEM_BYPASS;
 476                 }
 477         }
 478         mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
 479         mr_op.mro_bind_override_addr = 1;
 480         status = tavor_mr_register(state, pd, &mr_attr, &mr, &mr_op);
 481         if (status != DDI_SUCCESS) {
 482                 /* Set "status" and "errormsg" and goto failure */
 483                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed register mr");
 484                 goto qpalloc_fail9;
 485         }
 486 
 487         /*
 488          * Calculate the offset between the kernel virtual address space
 489          * and the IB virtual address space.  This will be used when
 490          * posting work requests to properly initialize each WQE.
 491          */
 492         qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
 493             (uint64_t)mr->mr_bindinfo.bi_addr;
 494 
 495         /*
 496          * Fill in all the return arguments (if necessary).  This includes
 497          * real work queue sizes, real SGLs, and QP number
 498          */
 499         if (queuesz_p != NULL) {
 500                 queuesz_p->cs_sq     = (1 << log_qp_sq_size);
 501                 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
 502 
 503                 /* QP on an SRQ set these to 0 */
 504                 if (qp_srq_en) {
 505                         queuesz_p->cs_rq     = 0;
 506                         queuesz_p->cs_rq_sgl = 0;
 507                 } else {
 508                         queuesz_p->cs_rq     = (1 << log_qp_rq_size);
 509                         queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
 510                 }
 511         }
 512         if (qpn != NULL) {
 513                 *qpn = (ib_qpn_t)qp->qp_qpnum;
 514         }
 515 
 516         /*
 517          * Fill in the rest of the Tavor Queue Pair handle.  We can update
 518          * the following fields for use in further operations on the QP.
 519          */
 520         qp->qp_qpcrsrcp              = qpc;
 521         qp->qp_rsrcp         = rsrc;
 522         qp->qp_state         = TAVOR_QP_RESET;
 523         qp->qp_pdhdl         = pd;
 524         qp->qp_mrhdl         = mr;
 525         qp->qp_sq_sigtype    = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
 526             TAVOR_QP_SQ_WR_SIGNALED : TAVOR_QP_SQ_ALL_SIGNALED;
 527         qp->qp_is_special    = 0;
 528         qp->qp_is_umap               = qp_is_umap;
 529         qp->qp_uarpg         = (qp->qp_is_umap) ? uarpg : 0;
 530         qp->qp_umap_dhp              = (devmap_cookie_t)NULL;
 531         qp->qp_sq_cqhdl              = sq_cq;
 532         qp->qp_sq_lastwqeaddr        = NULL;
 533         qp->qp_sq_bufsz              = (1 << log_qp_sq_size);
 534         qp->qp_sq_buf                = sq_buf;
 535         qp->qp_desc_off              = qp_desc_off;
 536         qp->qp_rq_cqhdl              = rq_cq;
 537         qp->qp_rq_lastwqeaddr        = NULL;
 538         qp->qp_rq_buf                = rq_buf;
 539 
 540         /* QP on an SRQ sets this to 0 */
 541         if (qp_srq_en) {
 542                 qp->qp_rq_bufsz              = 0;
 543         } else {
 544                 qp->qp_rq_bufsz              = (1 << log_qp_rq_size);
 545         }
 546 
 547         qp->qp_forward_sqd_event  = 0;
 548         qp->qp_sqd_still_draining = 0;
 549         qp->qp_hdlrarg               = (void *)ibt_qphdl;
 550         qp->qp_mcg_refcnt    = 0;
 551 
 552         /*
 553          * If this QP is to be associated with an SRQ, then set the SRQ handle
 554          * appropriately.
 555          */
 556         if (qp_srq_en) {
 557                 qp->qp_srqhdl = srq;
 558                 qp->qp_srq_en = TAVOR_QP_SRQ_ENABLED;
 559                 tavor_srq_refcnt_inc(qp->qp_srqhdl);
 560         } else {
 561                 qp->qp_srqhdl = NULL;
 562                 qp->qp_srq_en = TAVOR_QP_SRQ_DISABLED;
 563         }
 564 
 565         /* Determine if later ddi_dma_sync will be necessary */
 566         qp->qp_sync = TAVOR_QP_IS_SYNC_REQ(state, qp->qp_wqinfo);
 567 
 568         /* Determine the QP service type */
 569         if (type == IBT_RC_RQP) {
 570                 qp->qp_serv_type = TAVOR_QP_RC;
 571         } else if (type == IBT_UD_RQP) {
 572                 qp->qp_serv_type = TAVOR_QP_UD;
 573         } else {
 574                 qp->qp_serv_type = TAVOR_QP_UC;
 575         }
 576 
 577         /* Zero out the QP context */
 578         bzero(&qp->qpc, sizeof (tavor_hw_qpc_t));
 579 
 580         /*
 581          * Put QP handle in Tavor QPNum-to-QPHdl list.  Then fill in the
 582          * "qphdl" and return success
 583          */
 584         ASSERT(state->ts_qphdl[qpc->tr_indx] == NULL);
 585         state->ts_qphdl[qpc->tr_indx] = qp;
 586 
 587         /*
 588          * If this is a user-mappable QP, then we need to insert the previously
 589          * allocated entry into the "userland resources database".  This will
 590          * allow for later lookup during devmap() (i.e. mmap()) calls.
 591          */
 592         if (qp_is_umap) {
 593                 tavor_umap_db_add(umapdb);
 594         }
 595 
 596         *qphdl = qp;
 597 
 598         TAVOR_TNF_EXIT(tavor_qp_alloc);
 599         return (DDI_SUCCESS);
 600 
 601 /*
 602  * The following is cleanup for all possible failure cases in this routine
 603  */
 604 qpalloc_fail9:
 605         tavor_queue_free(state, &qp->qp_wqinfo);
 606 qpalloc_fail8:
 607         if (type == IBT_RC_RQP) {
 608                 tavor_rsrc_free(state, &rdb);
 609         }
 610 qpalloc_fail7:
 611         if (qp_is_umap) {
 612                 tavor_umap_db_free(umapdb);
 613         }
 614 qpalloc_fail6:
 615         /*
 616          * Releasing the QPN will also free up the QPC context.  Update
 617          * the QPC context pointer to indicate this.
 618          */
 619         tavor_qp_release_qpn(state, qp->qp_qpn_hdl, TAVOR_QPN_RELEASE);
 620         qpc = NULL;
 621 qpalloc_fail5:
 622         tavor_rsrc_free(state, &rsrc);
 623 qpalloc_fail4:
 624         if (qpc) {
 625                 tavor_rsrc_free(state, &qpc);
 626         }
 627 qpalloc_fail3:
 628         tavor_cq_refcnt_dec(rq_cq);
 629 qpalloc_fail2:
 630         tavor_cq_refcnt_dec(sq_cq);
 631 qpalloc_fail1:
 632         tavor_pd_refcnt_dec(pd);
 633 qpalloc_fail:
 634         TNF_PROBE_1(tavor_qp_alloc_fail, TAVOR_TNF_ERROR, "",
 635             tnf_string, msg, errormsg);
 636         TAVOR_TNF_EXIT(tavor_qp_alloc);
 637         return (status);
 638 }
 639 
 640 
 641 
 642 /*
 643  * tavor_special_qp_alloc()
 644  *    Context: Can be called only from user or kernel context.
 645  */
 646 int
 647 tavor_special_qp_alloc(tavor_state_t *state, tavor_qp_info_t *qpinfo,
 648     uint_t sleepflag, tavor_qp_options_t *op)
 649 {
 650         tavor_rsrc_t            *qpc, *rsrc;
 651         tavor_qphdl_t           qp;
 652         ibt_qp_alloc_attr_t     *attr_p;
 653         ibt_sqp_type_t          type;
 654         uint8_t                 port;
 655         ibtl_qp_hdl_t           ibt_qphdl;
 656         ibt_chan_sizes_t        *queuesz_p;
 657         tavor_qphdl_t           *qphdl;
 658         ibt_mr_attr_t           mr_attr;
 659         tavor_mr_options_t      mr_op;
 660         tavor_pdhdl_t           pd;
 661         tavor_cqhdl_t           sq_cq, rq_cq;
 662         tavor_mrhdl_t           mr;
 663         uint64_t                qp_desc_off;
 664         uint32_t                *sq_buf, *rq_buf;
 665         uint32_t                log_qp_sq_size, log_qp_rq_size;
 666         uint32_t                sq_size, rq_size, max_sgl;
 667         uint32_t                sq_wqe_size, rq_wqe_size;
 668         uint_t                  wq_location, dma_xfer_mode;
 669         int                     status, flag;
 670         char                    *errormsg;
 671 
 672         TAVOR_TNF_ENTER(tavor_special_qp_alloc);
 673 
 674         /*
 675          * Check the "options" flag.  Currently this flag tells the driver
 676          * whether or not the QP's work queues should be come from normal
 677          * system memory or whether they should be allocated from DDR memory.
 678          */
 679         if (op == NULL) {
 680                 wq_location = TAVOR_QUEUE_LOCATION_NORMAL;
 681         } else {
 682                 wq_location = op->qpo_wq_loc;
 683         }
 684 
 685         /*
 686          * Extract the necessary info from the tavor_qp_info_t structure
 687          */
 688         attr_p    = qpinfo->qpi_attrp;
 689         type      = qpinfo->qpi_type;
 690         port      = qpinfo->qpi_port;
 691         ibt_qphdl = qpinfo->qpi_ibt_qphdl;
 692         queuesz_p = qpinfo->qpi_queueszp;
 693         qphdl     = &qpinfo->qpi_qphdl;
 694 
 695         /*
 696          * Check for valid special QP type (only SMI & GSI supported)
 697          */
 698         if ((type != IBT_SMI_SQP) && (type != IBT_GSI_SQP)) {
 699                 /* Set "status" and "errormsg" and goto failure */
 700                 TAVOR_TNF_FAIL(IBT_QP_SPECIAL_TYPE_INVALID, "invalid QP type");
 701                 goto spec_qpalloc_fail;
 702         }
 703 
 704         /*
 705          * Check for valid port number
 706          */
 707         if (!tavor_portnum_is_valid(state, port)) {
 708                 /* Set "status" and "errormsg" and goto failure */
 709                 TAVOR_TNF_FAIL(IBT_HCA_PORT_INVALID, "invalid port num");
 710                 goto spec_qpalloc_fail;
 711         }
 712         port = port - 1;
 713 
 714         /*
 715          * Check for valid PD handle pointer
 716          */
 717         if (attr_p->qp_pd_hdl == NULL) {
 718                 /* Set "status" and "errormsg" and goto failure */
 719                 TAVOR_TNF_FAIL(IBT_PD_HDL_INVALID, "invalid PD handle");
 720                 goto spec_qpalloc_fail;
 721         }
 722         pd = (tavor_pdhdl_t)attr_p->qp_pd_hdl;
 723 
 724         /* Increment the reference count on the PD */
 725         tavor_pd_refcnt_inc(pd);
 726 
 727         /*
 728          * Check for valid CQ handle pointers
 729          */
 730         if ((attr_p->qp_ibc_scq_hdl == NULL) ||
 731             (attr_p->qp_ibc_rcq_hdl == NULL)) {
 732                 /* Set "status" and "errormsg" and goto failure */
 733                 TAVOR_TNF_FAIL(IBT_CQ_HDL_INVALID, "invalid CQ handle");
 734                 goto spec_qpalloc_fail1;
 735         }
 736         sq_cq = (tavor_cqhdl_t)attr_p->qp_ibc_scq_hdl;
 737         rq_cq = (tavor_cqhdl_t)attr_p->qp_ibc_rcq_hdl;
 738 
 739         /*
 740          * Increment the reference count on the CQs.  One or both of these
 741          * could return error if we determine that the given CQ is already
 742          * being used with a non-special QP (i.e. a normal QP).
 743          */
 744         status = tavor_cq_refcnt_inc(sq_cq, TAVOR_CQ_IS_SPECIAL);
 745         if (status != DDI_SUCCESS) {
 746                 /* Set "status" and "errormsg" and goto failure */
 747                 TAVOR_TNF_FAIL(IBT_CQ_HDL_INVALID, "invalid CQ handle");
 748                 goto spec_qpalloc_fail1;
 749         }
 750         status = tavor_cq_refcnt_inc(rq_cq, TAVOR_CQ_IS_SPECIAL);
 751         if (status != DDI_SUCCESS) {
 752                 /* Set "status" and "errormsg" and goto failure */
 753                 TAVOR_TNF_FAIL(IBT_CQ_HDL_INVALID, "invalid CQ handle");
 754                 goto spec_qpalloc_fail2;
 755         }
 756 
 757         /*
 758          * Allocate the special QP resources.  Essentially, this allocation
 759          * amounts to checking if the request special QP has already been
 760          * allocated.  If successful, the QP context return is an actual
 761          * QP context that has been "aliased" to act as a special QP of the
 762          * appropriate type (and for the appropriate port).  Just as in
 763          * tavor_qp_alloc() above, ownership for this QP context is not
 764          * immediately given to hardware in the final step here.  Instead, we
 765          * wait until the QP is later transitioned to the "Init" state before
 766          * passing the QP to hardware.  If we fail here, we must undo all
 767          * the reference count (CQ and PD).
 768          */
 769         status = tavor_special_qp_rsrc_alloc(state, type, port, &qpc);
 770         if (status != DDI_SUCCESS) {
 771                 /* Set "status" and "errormsg" and goto failure */
 772                 TAVOR_TNF_FAIL(status, "failed special QP rsrc");
 773                 goto spec_qpalloc_fail3;
 774         }
 775 
 776         /*
 777          * Allocate the software structure for tracking the special queue
 778          * pair (i.e. the Tavor Queue Pair handle).  If we fail here, we
 779          * must undo the reference counts and the previous resource allocation.
 780          */
 781         status = tavor_rsrc_alloc(state, TAVOR_QPHDL, 1, sleepflag, &rsrc);
 782         if (status != DDI_SUCCESS) {
 783                 /* Set "status" and "errormsg" and goto failure */
 784                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed QP handle");
 785                 goto spec_qpalloc_fail4;
 786         }
 787         qp = (tavor_qphdl_t)rsrc->tr_addr;
 788         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
 789 
 790         /*
 791          * Actual QP number is a combination of the index of the QPC and
 792          * the port number.  This is because the special QP contexts must
 793          * be allocated two-at-a-time.
 794          */
 795         qp->qp_qpnum = qpc->tr_indx + port;
 796 
 797         /*
 798          * Calculate the appropriate size for the work queues.
 799          * Note:  All Tavor QP work queues must be a power-of-2 in size.  Also
 800          * they may not be any smaller than TAVOR_QP_MIN_SIZE.  This step is
 801          * to round the requested size up to the next highest power-of-2
 802          */
 803         attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq, TAVOR_QP_MIN_SIZE);
 804         attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq, TAVOR_QP_MIN_SIZE);
 805         log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq);
 806         if ((attr_p->qp_sizes.cs_sq & (attr_p->qp_sizes.cs_sq - 1)) == 0) {
 807                 log_qp_sq_size = log_qp_sq_size - 1;
 808         }
 809         log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
 810         if ((attr_p->qp_sizes.cs_rq & (attr_p->qp_sizes.cs_rq - 1)) == 0) {
 811                 log_qp_rq_size = log_qp_rq_size - 1;
 812         }
 813 
 814         /*
 815          * Next we verify that the rounded-up size is valid (i.e. consistent
 816          * with the device limits and/or software-configured limits).  If not,
 817          * then obviously we have a bit of cleanup to do before returning.
 818          */
 819         if ((log_qp_sq_size > state->ts_cfg_profile->cp_log_max_qp_sz) ||
 820             (log_qp_rq_size > state->ts_cfg_profile->cp_log_max_qp_sz)) {
 821                 /* Set "status" and "errormsg" and goto failure */
 822                 TAVOR_TNF_FAIL(IBT_HCA_WR_EXCEEDED, "max QP size");
 823                 goto spec_qpalloc_fail5;
 824         }
 825 
 826         /*
 827          * Next we verify that the requested number of SGL is valid (i.e.
 828          * consistent with the device limits and/or software-configured
 829          * limits).  If not, then obviously the same cleanup needs to be done.
 830          */
 831         max_sgl = state->ts_cfg_profile->cp_wqe_real_max_sgl;
 832         if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
 833             (attr_p->qp_sizes.cs_rq_sgl > max_sgl)) {
 834                 /* Set "status" and "errormsg" and goto failure */
 835                 TAVOR_TNF_FAIL(IBT_HCA_SGL_EXCEEDED, "max QP SGL");
 836                 goto spec_qpalloc_fail5;
 837         }
 838 
 839         /*
 840          * Determine this QP's WQE sizes (for both the Send and Recv WQEs).
 841          * This will depend on the requested number of SGLs.  Note: this
 842          * has the side-effect of also calculating the real number of SGLs
 843          * (for the calculated WQE size).
 844          */
 845         tavor_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
 846             TAVOR_QP_WQ_TYPE_RECVQ, &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl);
 847         if (type == IBT_SMI_SQP) {
 848                 tavor_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
 849                     TAVOR_QP_WQ_TYPE_SENDMLX_QP0, &qp->qp_sq_log_wqesz,
 850                     &qp->qp_sq_sgl);
 851         } else {
 852                 tavor_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
 853                     TAVOR_QP_WQ_TYPE_SENDMLX_QP1, &qp->qp_sq_log_wqesz,
 854                     &qp->qp_sq_sgl);
 855         }
 856 
 857         /*
 858          * Allocate the memory for QP work queues.  Note:  The location from
 859          * which we will allocate these work queues has been passed in
 860          * through the tavor_qp_options_t structure.  Since Tavor work queues
 861          * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
 862          * the work queue memory is very important.  We used to allocate
 863          * work queues (the combined receive and send queues) so that they
 864          * would be aligned on their combined size.  That alignment guaranteed
 865          * that they would never cross the 4GB boundary (Tavor work queues
 866          * are on the order of MBs at maximum).  Now we are able to relax
 867          * this alignment constraint by ensuring that the IB address assigned
 868          * to the queue memory (as a result of the tavor_mr_register() call)
 869          * is offset from zero.
 870          * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
 871          * guarantee the alignment, but when attempting to use IOMMU bypass
 872          * mode we found that we were not allowed to specify any alignment
 873          * that was more restrictive than the system page size.
 874          * So we avoided this constraint by passing two alignment values,
 875          * one for the memory allocation itself and the other for the DMA
 876          * handle (for later bind).  This used to cause more memory than
 877          * necessary to be allocated (in order to guarantee the more
 878          * restrictive alignment contraint).  But be guaranteeing the
 879          * zero-based IB virtual address for the queue, we are able to
 880          * conserve this memory.
 881          */
 882         sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
 883         rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
 884         sq_size     = (1 << log_qp_sq_size) * sq_wqe_size;
 885         rq_size     = (1 << log_qp_rq_size) * rq_wqe_size;
 886         qp->qp_wqinfo.qa_size          = sq_size + rq_size;
 887         qp->qp_wqinfo.qa_alloc_align = max(sq_wqe_size, rq_wqe_size);
 888         qp->qp_wqinfo.qa_bind_align  = max(sq_wqe_size, rq_wqe_size);
 889         qp->qp_wqinfo.qa_location = wq_location;
 890         status = tavor_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
 891         if (status != NULL) {
 892                 /* Set "status" and "errormsg" and goto failure */
 893                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed work queue");
 894                 goto spec_qpalloc_fail5;
 895         }
 896         if (sq_wqe_size > rq_wqe_size) {
 897                 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
 898                 rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
 899         } else {
 900                 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
 901                 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
 902         }
 903 
 904         /*
 905          * Register the memory for the special QP work queues.  The memory for
 906          * the special QP must be registered in the Tavor TPT tables.  This
 907          * gives us the LKey to specify in the QP context later.  Note: The
 908          * memory for Tavor work queues (both Send and Recv) must be contiguous
 909          * and registered as a single memory region.  Note also: If the work
 910          * queue is to be allocated from DDR memory, then only a "bypass"
 911          * mapping is appropriate.
 912          * Also, in order to meet the alignment restriction, we pass the
 913          * "mro_bind_override_addr" flag in the call to tavor_mr_register().
 914          * This guarantees that the resulting IB vaddr will be zero-based
 915          * (modulo the offset into the first page).
 916          * If we fail here, we have a bunch of resource and reference count
 917          * cleanup to do.
 918          */
 919         flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP :
 920             IBT_MR_NOSLEEP;
 921         mr_attr.mr_vaddr    = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
 922         mr_attr.mr_len      = qp->qp_wqinfo.qa_size;
 923         mr_attr.mr_as       = NULL;
 924         mr_attr.mr_flags    = flag;
 925         if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) {
 926                 mr_op.mro_bind_type = state->ts_cfg_profile->cp_iommu_bypass;
 927 
 928                 dma_xfer_mode = state->ts_cfg_profile->cp_streaming_consistent;
 929                 if (dma_xfer_mode == DDI_DMA_STREAMING) {
 930                         mr_attr.mr_flags |= IBT_MR_NONCOHERENT;
 931                 }
 932         } else {
 933                 mr_op.mro_bind_type = TAVOR_BINDMEM_BYPASS;
 934         }
 935         mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
 936         mr_op.mro_bind_override_addr = 1;
 937         status = tavor_mr_register(state, pd, &mr_attr, &mr, &mr_op);
 938         if (status != DDI_SUCCESS) {
 939                 /* Set "status" and "errormsg" and goto failure */
 940                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed register mr");
 941                 goto spec_qpalloc_fail6;
 942         }
 943 
 944         /*
 945          * Calculate the offset between the kernel virtual address space
 946          * and the IB virtual address space.  This will be used when
 947          * posting work requests to properly initialize each WQE.
 948          */
 949         qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
 950             (uint64_t)mr->mr_bindinfo.bi_addr;
 951 
 952         /*
 953          * Fill in all the return arguments (if necessary).  This includes
 954          * real work queue sizes, real SGLs, and QP number (which will be
 955          * either zero or one, depending on the special QP type)
 956          */
 957         if (queuesz_p != NULL) {
 958                 queuesz_p->cs_sq     = (1 << log_qp_sq_size);
 959                 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
 960                 queuesz_p->cs_rq     = (1 << log_qp_rq_size);
 961                 queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
 962         }
 963 
 964         /*
 965          * Fill in the rest of the Tavor Queue Pair handle.  We can update
 966          * the following fields for use in further operations on the QP.
 967          */
 968         qp->qp_qpcrsrcp              = qpc;
 969         qp->qp_rsrcp         = rsrc;
 970         qp->qp_state         = TAVOR_QP_RESET;
 971         qp->qp_pdhdl         = pd;
 972         qp->qp_mrhdl         = mr;
 973         qp->qp_sq_sigtype    = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
 974             TAVOR_QP_SQ_WR_SIGNALED : TAVOR_QP_SQ_ALL_SIGNALED;
 975         qp->qp_is_special    = (type == IBT_SMI_SQP) ?
 976             TAVOR_QP_SMI : TAVOR_QP_GSI;
 977         qp->qp_is_umap               = 0;
 978         qp->qp_uarpg         = 0;
 979         qp->qp_sq_cqhdl              = sq_cq;
 980         qp->qp_sq_lastwqeaddr        = NULL;
 981         qp->qp_sq_bufsz              = (1 << log_qp_sq_size);
 982         qp->qp_sq_buf                = sq_buf;
 983         qp->qp_desc_off              = qp_desc_off;
 984         qp->qp_rq_cqhdl              = rq_cq;
 985         qp->qp_rq_lastwqeaddr        = NULL;
 986         qp->qp_rq_bufsz              = (1 << log_qp_rq_size);
 987         qp->qp_rq_buf                = rq_buf;
 988         qp->qp_portnum               = port;
 989         qp->qp_pkeyindx              = 0;
 990         qp->qp_hdlrarg               = (void *)ibt_qphdl;
 991         qp->qp_mcg_refcnt    = 0;
 992         qp->qp_srq_en                = 0;
 993         qp->qp_srqhdl                = NULL;
 994 
 995         /* Determine if later ddi_dma_sync will be necessary */
 996         qp->qp_sync = TAVOR_QP_IS_SYNC_REQ(state, qp->qp_wqinfo);
 997 
 998         /* All special QPs are UD QP service type */
 999         qp->qp_serv_type = TAVOR_QP_UD;
1000 
1001         /* Zero out the QP context */
1002         bzero(&qp->qpc, sizeof (tavor_hw_qpc_t));
1003 
1004         /*
1005          * Put QP handle in Tavor QPNum-to-QPHdl list.  Then fill in the
1006          * "qphdl" and return success
1007          */
1008         ASSERT(state->ts_qphdl[qpc->tr_indx + port] == NULL);
1009         state->ts_qphdl[qpc->tr_indx + port] = qp;
1010 
1011         *qphdl = qp;
1012 
1013         TAVOR_TNF_EXIT(tavor_special_qp_alloc);
1014         return (DDI_SUCCESS);
1015 
1016 /*
1017  * The following is cleanup for all possible failure cases in this routine
1018  */
1019 spec_qpalloc_fail6:
1020         tavor_queue_free(state, &qp->qp_wqinfo);
1021 spec_qpalloc_fail5:
1022         tavor_rsrc_free(state, &rsrc);
1023 spec_qpalloc_fail4:
1024         if (tavor_special_qp_rsrc_free(state, type, port) != DDI_SUCCESS) {
1025                 TAVOR_WARNING(state, "failed to free special QP rsrc");
1026         }
1027 spec_qpalloc_fail3:
1028         tavor_cq_refcnt_dec(rq_cq);
1029 spec_qpalloc_fail2:
1030         tavor_cq_refcnt_dec(sq_cq);
1031 spec_qpalloc_fail1:
1032         tavor_pd_refcnt_dec(pd);
1033 spec_qpalloc_fail:
1034         TNF_PROBE_1(tavor_special_qp_alloc_fail, TAVOR_TNF_ERROR, "",
1035             tnf_string, msg, errormsg);
1036         TAVOR_TNF_EXIT(tavor_special_qp_alloc);
1037         return (status);
1038 }
1039 
1040 
1041 /*
1042  * tavor_qp_free()
1043  *    This function frees up the QP resources.  Depending on the value
1044  *    of the "free_qp_flags", the QP number may not be released until
1045  *    a subsequent call to tavor_qp_release_qpn().
1046  *
1047  *    Context: Can be called only from user or kernel context.
1048  */
1049 /* ARGSUSED */
1050 int
1051 tavor_qp_free(tavor_state_t *state, tavor_qphdl_t *qphdl,
1052     ibc_free_qp_flags_t free_qp_flags, ibc_qpn_hdl_t *qpnh,
1053     uint_t sleepflag)
1054 {
1055         tavor_rsrc_t            *qpc, *rdb, *rsrc;
1056         tavor_umap_db_entry_t   *umapdb;
1057         tavor_qpn_entry_t       *entry;
1058         tavor_pdhdl_t           pd;
1059         tavor_mrhdl_t           mr;
1060         tavor_cqhdl_t           sq_cq, rq_cq;
1061         tavor_srqhdl_t          srq;
1062         tavor_qphdl_t           qp;
1063         uint64_t                value;
1064         uint_t                  type, port;
1065         uint_t                  maxprot;
1066         uint_t                  qp_srq_en;
1067         int                     status;
1068         char                    *errormsg;
1069 
1070         TAVOR_TNF_ENTER(tavor_qp_free);
1071 
1072         /*
1073          * Pull all the necessary information from the Tavor Queue Pair
1074          * handle.  This is necessary here because the resource for the
1075          * QP handle is going to be freed up as part of this operation.
1076          */
1077         qp      = *qphdl;
1078         mutex_enter(&qp->qp_lock);
1079         qpc     = qp->qp_qpcrsrcp;
1080         rsrc    = qp->qp_rsrcp;
1081         pd      = qp->qp_pdhdl;
1082         srq     = qp->qp_srqhdl;
1083         mr      = qp->qp_mrhdl;
1084         rq_cq   = qp->qp_rq_cqhdl;
1085         sq_cq   = qp->qp_sq_cqhdl;
1086         rdb     = qp->qp_rdbrsrcp;
1087         port    = qp->qp_portnum;
1088         qp_srq_en = qp->qp_srq_en;
1089 
1090         /*
1091          * If the QP is part of an MCG, then we fail the qp_free
1092          */
1093         if (qp->qp_mcg_refcnt != 0) {
1094                 mutex_exit(&qp->qp_lock);
1095                 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), "QP part of MCG on free");
1096                 goto qpfree_fail;
1097         }
1098 
1099         /*
1100          * If the QP is not already in "Reset" state, then transition to
1101          * "Reset".  This is necessary because software does not reclaim
1102          * ownership of the QP context until the QP is in the "Reset" state.
1103          * If the ownership transfer fails for any reason, then it is an
1104          * indication that something (either in HW or SW) has gone seriously
1105          * wrong.  So we print a warning message and return.
1106          */
1107         if (qp->qp_state != TAVOR_QP_RESET) {
1108                 if (tavor_qp_to_reset(state, qp) != DDI_SUCCESS) {
1109                         mutex_exit(&qp->qp_lock);
1110                         TAVOR_WARNING(state, "failed to reset QP context");
1111                         /* Set "status" and "errormsg" and goto failure */
1112                         TAVOR_TNF_FAIL(ibc_get_ci_failure(0),
1113                             "reset QP context");
1114                         goto qpfree_fail;
1115                 }
1116                 qp->qp_state = TAVOR_QP_RESET;
1117 
1118                 /*
1119                  * Do any additional handling necessary for the transition
1120                  * to the "Reset" state (e.g. update the WRID lists)
1121                  */
1122                 tavor_wrid_to_reset_handling(state, qp);
1123         }
1124 
1125         /*
1126          * If this was a user-mappable QP, then we need to remove its entry
1127          * from the "userland resources database".  If it is also currently
1128          * mmap()'d out to a user process, then we need to call
1129          * devmap_devmem_remap() to remap the QP memory to an invalid mapping.
1130          * We also need to invalidate the QP tracking information for the
1131          * user mapping.
1132          */
1133         if (qp->qp_is_umap) {
1134                 status = tavor_umap_db_find(state->ts_instance, qp->qp_qpnum,
1135                     MLNX_UMAP_QPMEM_RSRC, &value, TAVOR_UMAP_DB_REMOVE,
1136                     &umapdb);
1137                 if (status != DDI_SUCCESS) {
1138                         mutex_exit(&qp->qp_lock);
1139                         TAVOR_WARNING(state, "failed to find in database");
1140                         TAVOR_TNF_EXIT(tavor_qp_free);
1141                         return (ibc_get_ci_failure(0));
1142                 }
1143                 tavor_umap_db_free(umapdb);
1144                 if (qp->qp_umap_dhp != NULL) {
1145                         maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
1146                         status = devmap_devmem_remap(qp->qp_umap_dhp,
1147                             state->ts_dip, 0, 0, qp->qp_wqinfo.qa_size,
1148                             maxprot, DEVMAP_MAPPING_INVALID, NULL);
1149                         if (status != DDI_SUCCESS) {
1150                                 mutex_exit(&qp->qp_lock);
1151                                 TAVOR_WARNING(state, "failed in QP memory "
1152                                     "devmap_devmem_remap()");
1153                                 TAVOR_TNF_EXIT(tavor_qp_free);
1154                                 return (ibc_get_ci_failure(0));
1155                         }
1156                         qp->qp_umap_dhp = (devmap_cookie_t)NULL;
1157                 }
1158         }
1159 
1160         /*
1161          * Put NULL into the Tavor QPNum-to-QPHdl list.  This will allow any
1162          * in-progress events to detect that the QP corresponding to this
1163          * number has been freed.  Note: it does depend in whether we are
1164          * freeing a special QP or not.
1165          */
1166         if (qp->qp_is_special) {
1167                 state->ts_qphdl[qpc->tr_indx + port] = NULL;
1168         } else {
1169                 state->ts_qphdl[qpc->tr_indx] = NULL;
1170         }
1171 
1172         /*
1173          * Drop the QP lock
1174          *    At this point the lock is no longer necessary.  We cannot
1175          *    protect from multiple simultaneous calls to free the same QP.
1176          *    In addition, since the QP lock is contained in the QP "software
1177          *    handle" resource, which we will free (see below), it is
1178          *    important that we have no further references to that memory.
1179          */
1180         mutex_exit(&qp->qp_lock);
1181         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
1182 
1183         /*
1184          * Free the QP resources
1185          *    Start by deregistering and freeing the memory for work queues.
1186          *    Next free any previously allocated context information
1187          *    (depending on QP type)
1188          *    Finally, decrement the necessary reference counts.
1189          * If this fails for any reason, then it is an indication that
1190          * something (either in HW or SW) has gone seriously wrong.  So we
1191          * print a warning message and return.
1192          */
1193         status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
1194             sleepflag);
1195         if (status != DDI_SUCCESS) {
1196                 TAVOR_WARNING(state, "failed to deregister QP memory");
1197                 /* Set "status" and "errormsg" and goto failure */
1198                 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), "failed deregister mr");
1199                 goto qpfree_fail;
1200         }
1201 
1202         /* Free the memory for the QP */
1203         tavor_queue_free(state, &qp->qp_wqinfo);
1204 
1205         /*
1206          * Free up the remainder of the QP resources.  Note: we have a few
1207          * different resources to free up depending on whether the QP is a
1208          * special QP or not.  As described above, if any of these fail for
1209          * any reason it is an indication that something (either in HW or SW)
1210          * has gone seriously wrong.  So we print a warning message and
1211          * return.
1212          */
1213         if (qp->qp_is_special) {
1214                 type = (qp->qp_is_special == TAVOR_QP_SMI) ?
1215                     IBT_SMI_SQP : IBT_GSI_SQP;
1216 
1217                 /* Free up resources for the special QP */
1218                 status = tavor_special_qp_rsrc_free(state, type, port);
1219                 if (status != DDI_SUCCESS) {
1220                         TAVOR_WARNING(state, "failed to free special QP rsrc");
1221                         /* Set "status" and "errormsg" and goto failure */
1222                         TAVOR_TNF_FAIL(ibc_get_ci_failure(0),
1223                             "failed special QP rsrc");
1224                         goto qpfree_fail;
1225                 }
1226 
1227         } else {
1228                 type = qp->qp_serv_type;
1229 
1230                 /* Free up the RDB entries resource */
1231                 if (type == TAVOR_QP_RC) {
1232                         tavor_rsrc_free(state, &rdb);
1233                 }
1234 
1235                 /*
1236                  * Check the flags and determine whether to release the
1237                  * QPN or not, based on their value.
1238                  */
1239                 if (free_qp_flags == IBC_FREE_QP_ONLY) {
1240                         entry = qp->qp_qpn_hdl;
1241                         tavor_qp_release_qpn(state, qp->qp_qpn_hdl,
1242                             TAVOR_QPN_FREE_ONLY);
1243                         *qpnh = (ibc_qpn_hdl_t)entry;
1244                 } else {
1245                         tavor_qp_release_qpn(state, qp->qp_qpn_hdl,
1246                             TAVOR_QPN_RELEASE);
1247                 }
1248         }
1249 
1250         /* Free the Tavor Queue Pair handle */
1251         tavor_rsrc_free(state, &rsrc);
1252 
1253         /* Decrement the reference counts on CQs, PD and SRQ (if needed) */
1254         tavor_cq_refcnt_dec(rq_cq);
1255         tavor_cq_refcnt_dec(sq_cq);
1256         tavor_pd_refcnt_dec(pd);
1257         if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
1258                 tavor_srq_refcnt_dec(srq);
1259         }
1260 
1261         /* Set the qphdl pointer to NULL and return success */
1262         *qphdl = NULL;
1263 
1264         TAVOR_TNF_EXIT(tavor_qp_free);
1265         return (DDI_SUCCESS);
1266 
1267 qpfree_fail:
1268         TNF_PROBE_1(tavor_qp_free_fail, TAVOR_TNF_ERROR, "",
1269             tnf_string, msg, errormsg);
1270         TAVOR_TNF_EXIT(tavor_qp_free);
1271         return (status);
1272 }
1273 
1274 
1275 /*
1276  * tavor_qp_query()
1277  *    Context: Can be called from interrupt or base context.
1278  */
1279 int
1280 tavor_qp_query(tavor_state_t *state, tavor_qphdl_t qp,
1281     ibt_qp_query_attr_t *attr_p)
1282 {
1283         ibt_cep_state_t         qp_state;
1284         ibt_qp_ud_attr_t        *ud;
1285         ibt_qp_rc_attr_t        *rc;
1286         ibt_qp_uc_attr_t        *uc;
1287         ibt_cep_flags_t         enable_flags;
1288         tavor_hw_addr_path_t    *qpc_path, *qpc_alt_path;
1289         ibt_cep_path_t          *path_ptr, *alt_path_ptr;
1290         tavor_hw_qpc_t          *qpc;
1291         int                     status;
1292 
1293         TAVOR_TNF_ENTER(tavor_qp_query);
1294 
1295         mutex_enter(&qp->qp_lock);
1296 
1297         /*
1298          * Grab the temporary QPC entry from QP software state
1299          */
1300         qpc = &qp->qpc;
1301 
1302         /* Convert the current Tavor QP state to IBTF QP state */
1303         switch (qp->qp_state) {
1304         case TAVOR_QP_RESET:
1305                 qp_state = IBT_STATE_RESET;             /* "Reset" */
1306                 break;
1307         case TAVOR_QP_INIT:
1308                 qp_state = IBT_STATE_INIT;              /* Initialized */
1309                 break;
1310         case TAVOR_QP_RTR:
1311                 qp_state = IBT_STATE_RTR;               /* Ready to Receive */
1312                 break;
1313         case TAVOR_QP_RTS:
1314                 qp_state = IBT_STATE_RTS;               /* Ready to Send */
1315                 break;
1316         case TAVOR_QP_SQERR:
1317                 qp_state = IBT_STATE_SQE;               /* Send Queue Error */
1318                 break;
1319         case TAVOR_QP_SQD:
1320                 if (qp->qp_sqd_still_draining) {
1321                         qp_state = IBT_STATE_SQDRAIN;   /* SQ Draining */
1322                 } else {
1323                         qp_state = IBT_STATE_SQD;       /* SQ Drained */
1324                 }
1325                 break;
1326         case TAVOR_QP_ERR:
1327                 qp_state = IBT_STATE_ERROR;             /* Error */
1328                 break;
1329         default:
1330                 mutex_exit(&qp->qp_lock);
1331                 TNF_PROBE_1(tavor_qp_query_inv_qpstate_fail,
1332                     TAVOR_TNF_ERROR, "", tnf_uint, qpstate, qp->qp_state);
1333                 TAVOR_TNF_EXIT(tavor_qp_query);
1334                 return (ibc_get_ci_failure(0));
1335         }
1336         attr_p->qp_info.qp_state = qp_state;
1337 
1338         /* SRQ Hook. */
1339         attr_p->qp_srq = NULL;
1340 
1341         /*
1342          * The following QP information is always returned, regardless of
1343          * the current QP state.  Note: Some special handling is necessary
1344          * for calculating the QP number on special QP (QP0 and QP1).
1345          */
1346         attr_p->qp_sq_cq    = qp->qp_sq_cqhdl->cq_hdlrarg;
1347         attr_p->qp_rq_cq    = qp->qp_rq_cqhdl->cq_hdlrarg;
1348         if (qp->qp_is_special) {
1349                 attr_p->qp_qpn = (qp->qp_is_special == TAVOR_QP_SMI) ? 0 : 1;
1350         } else {
1351                 attr_p->qp_qpn = (ib_qpn_t)qp->qp_qpnum;
1352         }
1353         attr_p->qp_sq_sgl   = qp->qp_sq_sgl;
1354         attr_p->qp_rq_sgl   = qp->qp_rq_sgl;
1355         attr_p->qp_info.qp_sq_sz = qp->qp_sq_bufsz;
1356         attr_p->qp_info.qp_rq_sz = qp->qp_rq_bufsz;
1357 
1358         /*
1359          * If QP is currently in the "Reset" state, then only the above are
1360          * returned
1361          */
1362         if (qp_state == IBT_STATE_RESET) {
1363                 mutex_exit(&qp->qp_lock);
1364                 TAVOR_TNF_EXIT(tavor_qp_query);
1365                 return (DDI_SUCCESS);
1366         }
1367 
1368         /*
1369          * Post QUERY_QP command to firmware
1370          *
1371          * We do a TAVOR_NOSLEEP here because we are holding the "qp_lock".
1372          * Since we may be in the interrupt context (or subsequently raised
1373          * to interrupt level by priority inversion), we do not want to block
1374          * in this routine waiting for success.
1375          */
1376         status = tavor_cmn_query_cmd_post(state, QUERY_QP, qp->qp_qpnum,
1377             qpc, sizeof (tavor_hw_qpc_t), TAVOR_CMD_NOSLEEP_SPIN);
1378         if (status != TAVOR_CMD_SUCCESS) {
1379                 mutex_exit(&qp->qp_lock);
1380                 cmn_err(CE_CONT, "Tavor: QUERY_QP command failed: %08x\n",
1381                     status);
1382                 TNF_PROBE_1(tavor_qp_query_cmd_fail, TAVOR_TNF_ERROR, "",
1383                     tnf_uint, status, status);
1384                 TAVOR_TNF_EXIT(tavor_qp_query);
1385                 return (ibc_get_ci_failure(0));
1386         }
1387 
1388         /*
1389          * Fill in the additional QP info based on the QP's transport type.
1390          */
1391         if (qp->qp_serv_type == TAVOR_QP_UD) {
1392 
1393                 /* Fill in the UD-specific info */
1394                 ud = &attr_p->qp_info.qp_transport.ud;
1395                 ud->ud_qkey  = (ib_qkey_t)qpc->qkey;
1396                 ud->ud_sq_psn        = qpc->next_snd_psn;
1397                 ud->ud_pkey_ix       = qpc->pri_addr_path.pkey_indx;
1398                 ud->ud_port  = qpc->pri_addr_path.portnum;
1399 
1400                 attr_p->qp_info.qp_trans = IBT_UD_SRV;
1401 
1402         } else if (qp->qp_serv_type == TAVOR_QP_RC) {
1403 
1404                 /* Fill in the RC-specific info */
1405                 rc = &attr_p->qp_info.qp_transport.rc;
1406                 rc->rc_sq_psn        = qpc->next_snd_psn;
1407                 rc->rc_rq_psn        = qpc->next_rcv_psn;
1408                 rc->rc_dst_qpn       = qpc->rem_qpn;
1409 
1410                 /* Grab the path migration state information */
1411                 if (qpc->pm_state == TAVOR_QP_PMSTATE_MIGRATED) {
1412                         rc->rc_mig_state = IBT_STATE_MIGRATED;
1413                 } else if (qpc->pm_state == TAVOR_QP_PMSTATE_REARM) {
1414                         rc->rc_mig_state = IBT_STATE_REARMED;
1415                 } else {
1416                         rc->rc_mig_state = IBT_STATE_ARMED;
1417                 }
1418                 rc->rc_rdma_ra_out = (1 << qpc->sra_max);
1419                 rc->rc_rdma_ra_in  = (1 << qpc->rra_max);
1420                 rc->rc_min_rnr_nak = qpc->min_rnr_nak;
1421                 rc->rc_path_mtu         = qpc->mtu;
1422                 rc->rc_retry_cnt   = qpc->retry_cnt;
1423 
1424                 /* Get the common primary address path fields */
1425                 qpc_path = &qpc->pri_addr_path;
1426                 path_ptr = &rc->rc_path;
1427                 tavor_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect,
1428                     TAVOR_ADDRPATH_QP, qp);
1429 
1430                 /* Fill in the additional primary address path fields */
1431                 path_ptr->cep_pkey_ix           = qpc_path->pkey_indx;
1432                 path_ptr->cep_hca_port_num = qpc_path->portnum;
1433                 path_ptr->cep_timeout           = qpc_path->ack_timeout;
1434 
1435                 /* Get the common alternate address path fields */
1436                 qpc_alt_path = &qpc->alt_addr_path;
1437                 alt_path_ptr = &rc->rc_alt_path;
1438                 tavor_get_addr_path(state, qpc_alt_path,
1439                     &alt_path_ptr->cep_adds_vect, TAVOR_ADDRPATH_QP, qp);
1440 
1441                 /* Fill in the additional alternate address path fields */
1442                 alt_path_ptr->cep_pkey_ix    = qpc_alt_path->pkey_indx;
1443                 alt_path_ptr->cep_hca_port_num       = qpc_alt_path->portnum;
1444                 alt_path_ptr->cep_timeout    = qpc_alt_path->ack_timeout;
1445 
1446                 /* Get the RNR retry time from primary path */
1447                 rc->rc_rnr_retry_cnt = qpc_path->rnr_retry;
1448 
1449                 /* Set the enable flags based on RDMA/Atomic enable bits */
1450                 enable_flags = IBT_CEP_NO_FLAGS;
1451                 enable_flags |= ((qpc->rre == 0) ? 0 : IBT_CEP_RDMA_RD);
1452                 enable_flags |= ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR);
1453                 enable_flags |= ((qpc->rae == 0) ? 0 : IBT_CEP_ATOMIC);
1454                 attr_p->qp_info.qp_flags = enable_flags;
1455 
1456                 attr_p->qp_info.qp_trans = IBT_RC_SRV;
1457 
1458         } else if (qp->qp_serv_type == TAVOR_QP_UC) {
1459 
1460                 /* Fill in the UC-specific info */
1461                 uc = &attr_p->qp_info.qp_transport.uc;
1462                 uc->uc_sq_psn        = qpc->next_snd_psn;
1463                 uc->uc_rq_psn        = qpc->next_rcv_psn;
1464                 uc->uc_dst_qpn       = qpc->rem_qpn;
1465 
1466                 /* Grab the path migration state information */
1467                 if (qpc->pm_state == TAVOR_QP_PMSTATE_MIGRATED) {
1468                         uc->uc_mig_state = IBT_STATE_MIGRATED;
1469                 } else if (qpc->pm_state == TAVOR_QP_PMSTATE_REARM) {
1470                         uc->uc_mig_state = IBT_STATE_REARMED;
1471                 } else {
1472                         uc->uc_mig_state = IBT_STATE_ARMED;
1473                 }
1474                 uc->uc_path_mtu = qpc->mtu;
1475 
1476                 /* Get the common primary address path fields */
1477                 qpc_path = &qpc->pri_addr_path;
1478                 path_ptr = &uc->uc_path;
1479                 tavor_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect,
1480                     TAVOR_ADDRPATH_QP, qp);
1481 
1482                 /* Fill in the additional primary address path fields */
1483                 path_ptr->cep_pkey_ix           = qpc_path->pkey_indx;
1484                 path_ptr->cep_hca_port_num = qpc_path->portnum;
1485 
1486                 /* Get the common alternate address path fields */
1487                 qpc_alt_path = &qpc->alt_addr_path;
1488                 alt_path_ptr = &uc->uc_alt_path;
1489                 tavor_get_addr_path(state, qpc_alt_path,
1490                     &alt_path_ptr->cep_adds_vect, TAVOR_ADDRPATH_QP, qp);
1491 
1492                 /* Fill in the additional alternate address path fields */
1493                 alt_path_ptr->cep_pkey_ix    = qpc_alt_path->pkey_indx;
1494                 alt_path_ptr->cep_hca_port_num       = qpc_alt_path->portnum;
1495 
1496                 /*
1497                  * Set the enable flags based on RDMA enable bits (by
1498                  * definition UC doesn't support Atomic or RDMA Read)
1499                  */
1500                 enable_flags = ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR);
1501                 attr_p->qp_info.qp_flags = enable_flags;
1502 
1503                 attr_p->qp_info.qp_trans = IBT_UC_SRV;
1504 
1505         } else {
1506                 TAVOR_WARNING(state, "unexpected QP transport type");
1507                 mutex_exit(&qp->qp_lock);
1508                 return (ibc_get_ci_failure(0));
1509         }
1510 
1511         /*
1512          * Under certain circumstances it is possible for the Tavor hardware
1513          * to transition to one of the error states without software directly
1514          * knowing about it.  The QueryQP() call is the one place where we
1515          * have an opportunity to sample and update our view of the QP state.
1516          */
1517         if (qpc->state == TAVOR_QP_SQERR) {
1518                 attr_p->qp_info.qp_state = IBT_STATE_SQE;
1519                 qp->qp_state = TAVOR_QP_SQERR;
1520         }
1521         if (qpc->state == TAVOR_QP_ERR) {
1522                 attr_p->qp_info.qp_state = IBT_STATE_ERROR;
1523                 qp->qp_state = TAVOR_QP_ERR;
1524         }
1525         mutex_exit(&qp->qp_lock);
1526 
1527         TAVOR_TNF_EXIT(tavor_qp_query);
1528         return (DDI_SUCCESS);
1529 }
1530 
1531 
1532 /*
1533  * tavor_qp_create_qpn()
1534  *    Context: Can be called from interrupt or base context.
1535  */
1536 static int
1537 tavor_qp_create_qpn(tavor_state_t *state, tavor_qphdl_t qp, tavor_rsrc_t *qpc)
1538 {
1539         tavor_qpn_entry_t       query;
1540         tavor_qpn_entry_t       *entry;
1541         avl_index_t             where;
1542 
1543         TAVOR_TNF_ENTER(tavor_qp_create_qpn);
1544 
1545         /*
1546          * Build a query (for the AVL tree lookup) and attempt to find
1547          * a previously added entry that has a matching QPC index.  If
1548          * no matching entry is found, then allocate, initialize, and
1549          * add an entry to the AVL tree.
1550          * If a matching entry is found, then increment its QPN counter
1551          * and reference counter.
1552          */
1553         query.qpn_indx = qpc->tr_indx;
1554         mutex_enter(&state->ts_qpn_avl_lock);
1555         entry = (tavor_qpn_entry_t *)avl_find(&state->ts_qpn_avl,
1556             &query, &where);
1557         if (entry == NULL) {
1558                 /*
1559                  * Allocate and initialize a QPN entry, then insert
1560                  * it into the AVL tree.
1561                  */
1562                 entry = (tavor_qpn_entry_t *)kmem_zalloc(
1563                     sizeof (tavor_qpn_entry_t), KM_NOSLEEP);
1564                 if (entry == NULL) {
1565                         mutex_exit(&state->ts_qpn_avl_lock);
1566                         TAVOR_TNF_EXIT(tavor_qp_create_qpn);
1567                         return (DDI_FAILURE);
1568                 }
1569                 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*entry))
1570 
1571                 entry->qpn_indx         = qpc->tr_indx;
1572                 entry->qpn_refcnt  = 0;
1573                 entry->qpn_counter = 0;
1574 
1575                 avl_insert(&state->ts_qpn_avl, entry, where);
1576         }
1577 
1578         /*
1579          * Make the AVL tree entry point to the QP context resource that
1580          * it will be responsible for tracking
1581          */
1582         entry->qpn_qpc = qpc;
1583 
1584         /*
1585          * Setup the QP handle to point to the AVL tree entry.  Then
1586          * generate the new QP number from the entry's QPN counter value
1587          * and the hardware's QP context table index.
1588          */
1589         qp->qp_qpn_hdl       = entry;
1590         qp->qp_qpnum = ((entry->qpn_counter <<
1591             state->ts_cfg_profile->cp_log_num_qp) | qpc->tr_indx) &
1592             TAVOR_QP_MAXNUMBER_MSK;
1593 
1594         /*
1595          * Increment the reference counter and QPN counter.  The QPN
1596          * counter always indicates the next available number for use.
1597          */
1598         entry->qpn_counter++;
1599         entry->qpn_refcnt++;
1600 
1601         mutex_exit(&state->ts_qpn_avl_lock);
1602         TAVOR_TNF_EXIT(tavor_qp_create_qpn);
1603         return (DDI_SUCCESS);
1604 }
1605 
1606 
1607 /*
1608  * tavor_qp_release_qpn()
1609  *    Context: Can be called only from user or kernel context.
1610  */
1611 void
1612 tavor_qp_release_qpn(tavor_state_t *state, tavor_qpn_entry_t *entry, int flags)
1613 {
1614         TAVOR_TNF_ENTER(tavor_qp_release_qpn);
1615 
1616         ASSERT(entry != NULL);
1617 
1618         mutex_enter(&state->ts_qpn_avl_lock);
1619 
1620         /*
1621          * If we are releasing the QP number here, then we decrement the
1622          * reference count and check for zero references.  If there are
1623          * zero references, then we free the QPC context (if it hadn't
1624          * already been freed during a TAVOR_QPN_FREE_ONLY free, i.e. for
1625          * reuse with another similar QP number) and remove the tracking
1626          * structure from the QP number AVL tree and free the structure.
1627          * If we are not releasing the QP number here, then, as long as we
1628          * have not exhausted the usefulness of the QPC context (that is,
1629          * re-used it too many times without the reference count having
1630          * gone to zero), we free up the QPC context for use by another
1631          * thread (which will use it to construct a different QP number
1632          * from the same QPC table index).
1633          */
1634         if (flags == TAVOR_QPN_RELEASE) {
1635                 entry->qpn_refcnt--;
1636 
1637                 /*
1638                  * If the reference count is zero, then we free the QPC
1639                  * context (if it hadn't already been freed in an early
1640                  * step, e.g. TAVOR_QPN_FREE_ONLY) and remove/free the
1641                  * tracking structure from the QP number AVL tree.
1642                  */
1643                 if (entry->qpn_refcnt == 0) {
1644                         if (entry->qpn_qpc != NULL) {
1645                                 tavor_rsrc_free(state, &entry->qpn_qpc);
1646                         }
1647 
1648                         /*
1649                          * If the current entry has served it's useful
1650                          * purpose (i.e. been reused the maximum allowable
1651                          * number of times), then remove it from QP number
1652                          * AVL tree and free it up.
1653                          */
1654                         if (entry->qpn_counter >= (1 <<
1655                             (24 - state->ts_cfg_profile->cp_log_num_qp))) {
1656                                 avl_remove(&state->ts_qpn_avl, entry);
1657                                 kmem_free(entry, sizeof (tavor_qpn_entry_t));
1658                         }
1659                 }
1660 
1661         } else if (flags == TAVOR_QPN_FREE_ONLY) {
1662                 /*
1663                  * Even if we are not freeing the QP number, that will not
1664                  * always prevent us from releasing the QPC context.  In fact,
1665                  * since the QPC context only forms part of the whole QPN,
1666                  * we want to free it up for use by other consumers.  But
1667                  * if the reference count is non-zero (which it will always
1668                  * be when we are doing TAVOR_QPN_FREE_ONLY) and the counter
1669                  * has reached its maximum value, then we cannot reuse the
1670                  * QPC context until the reference count eventually reaches
1671                  * zero (in TAVOR_QPN_RELEASE, above).
1672                  */
1673                 if (entry->qpn_counter < (1 <<
1674                     (24 - state->ts_cfg_profile->cp_log_num_qp))) {
1675                         tavor_rsrc_free(state, &entry->qpn_qpc);
1676                 }
1677         }
1678         mutex_exit(&state->ts_qpn_avl_lock);
1679 
1680         TAVOR_TNF_EXIT(tavor_qp_release_qpn);
1681 }
1682 
1683 
1684 /*
1685  * tavor_qpn_db_compare()
1686  *    Context: Can be called from user or kernel context.
1687  */
1688 static int
1689 tavor_qpn_avl_compare(const void *q, const void *e)
1690 {
1691         tavor_qpn_entry_t       *entry, *query;
1692 
1693         TAVOR_TNF_ENTER(tavor_qpn_avl_compare);
1694 
1695         entry = (tavor_qpn_entry_t *)e;
1696         query = (tavor_qpn_entry_t *)q;
1697 
1698         if (query->qpn_indx < entry->qpn_indx) {
1699                 TAVOR_TNF_EXIT(tavor_qpn_avl_compare);
1700                 return (-1);
1701         } else if (query->qpn_indx > entry->qpn_indx) {
1702                 TAVOR_TNF_EXIT(tavor_qpn_avl_compare);
1703                 return (+1);
1704         } else {
1705                 TAVOR_TNF_EXIT(tavor_qpn_avl_compare);
1706                 return (0);
1707         }
1708 }
1709 
1710 
1711 /*
1712  * tavor_qpn_avl_init()
1713  *    Context: Only called from attach() path context
1714  */
1715 void
1716 tavor_qpn_avl_init(tavor_state_t *state)
1717 {
1718         TAVOR_TNF_ENTER(tavor_qpn_avl_init);
1719 
1720         /* Initialize the lock used for QP number (QPN) AVL tree access */
1721         mutex_init(&state->ts_qpn_avl_lock, NULL, MUTEX_DRIVER,
1722             DDI_INTR_PRI(state->ts_intrmsi_pri));
1723 
1724         /* Initialize the AVL tree for the QP number (QPN) storage */
1725         avl_create(&state->ts_qpn_avl, tavor_qpn_avl_compare,
1726             sizeof (tavor_qpn_entry_t),
1727             offsetof(tavor_qpn_entry_t, qpn_avlnode));
1728 
1729         TAVOR_TNF_EXIT(tavor_qpn_avl_init);
1730 }
1731 
1732 
1733 /*
1734  * tavor_qpn_avl_fini()
1735  *    Context: Only called from attach() and/or detach() path contexts
1736  */
1737 void
1738 tavor_qpn_avl_fini(tavor_state_t *state)
1739 {
1740         tavor_qpn_entry_t       *entry;
1741         void                    *cookie;
1742 
1743         TAVOR_TNF_ENTER(tavor_qpn_avl_fini);
1744 
1745         /*
1746          * Empty all entries (if necessary) and destroy the AVL tree
1747          * that was used for QP number (QPN) tracking.
1748          */
1749         cookie = NULL;
1750         while ((entry = (tavor_qpn_entry_t *)avl_destroy_nodes(
1751             &state->ts_qpn_avl, &cookie)) != NULL) {
1752                 kmem_free(entry, sizeof (tavor_qpn_entry_t));
1753         }
1754         avl_destroy(&state->ts_qpn_avl);
1755 
1756         /* Destroy the lock used for QP number (QPN) AVL tree access */
1757         mutex_destroy(&state->ts_qpn_avl_lock);
1758 
1759         TAVOR_TNF_EXIT(tavor_qpn_avl_fini);
1760 }
1761 
1762 
1763 /*
1764  * tavor_qphdl_from_qpnum()
1765  *    Context: Can be called from interrupt or base context.
1766  *
1767  *    This routine is important because changing the unconstrained
1768  *    portion of the QP number is critical to the detection of a
1769  *    potential race condition in the QP event handler code (i.e. the case
1770  *    where a QP is freed and alloc'd again before an event for the
1771  *    "old" QP can be handled).
1772  *
1773  *    While this is not a perfect solution (not sure that one exists)
1774  *    it does help to mitigate the chance that this race condition will
1775  *    cause us to deliver a "stale" event to the new QP owner.  Note:
1776  *    this solution does not scale well because the number of constrained
1777  *    bits increases (and, hence, the number of unconstrained bits
1778  *    decreases) as the number of supported QPs grows.  For small and
1779  *    intermediate values, it should hopefully provide sufficient
1780  *    protection.
1781  */
1782 tavor_qphdl_t
1783 tavor_qphdl_from_qpnum(tavor_state_t *state, uint_t qpnum)
1784 {
1785         uint_t  qpindx, qpmask;
1786 
1787         /* Calculate the QP table index from the qpnum */
1788         qpmask = (1 << state->ts_cfg_profile->cp_log_num_qp) - 1;
1789         qpindx = qpnum & qpmask;
1790         return (state->ts_qphdl[qpindx]);
1791 }
1792 
1793 
1794 /*
1795  * tavor_special_qp_rsrc_alloc
1796  *    Context: Can be called from interrupt or base context.
1797  */
1798 static int
1799 tavor_special_qp_rsrc_alloc(tavor_state_t *state, ibt_sqp_type_t type,
1800     uint_t port, tavor_rsrc_t **qp_rsrc)
1801 {
1802         uint_t          mask, flags;
1803         int             status;
1804 
1805         TAVOR_TNF_ENTER(tavor_special_qp_rsrc_alloc);
1806 
1807         mutex_enter(&state->ts_spec_qplock);
1808         flags = state->ts_spec_qpflags;
1809         if (type == IBT_SMI_SQP) {
1810                 /*
1811                  * Check here to see if the driver has been configured
1812                  * to instruct the Tavor firmware to handle all incoming
1813                  * SMP messages (i.e. messages sent to SMA).  If so,
1814                  * then we will treat QP0 as if it has already been
1815                  * allocated (for internal use).  Otherwise, if we allow
1816                  * the allocation to happen, it will cause unexpected
1817                  * behaviors (e.g. Tavor SMA becomes unresponsive).
1818                  */
1819                 if (state->ts_cfg_profile->cp_qp0_agents_in_fw != 0) {
1820                         mutex_exit(&state->ts_spec_qplock);
1821                         TNF_PROBE_0(tavor_special_qp0_alloc_already_in_fw,
1822                             TAVOR_TNF_ERROR, "");
1823                         TAVOR_TNF_EXIT(tavor_special_qp_rsrc_alloc);
1824                         return (IBT_QP_IN_USE);
1825                 }
1826 
1827                 /*
1828                  * If this is the first QP0 allocation, then post
1829                  * a CONF_SPECIAL_QP firmware command
1830                  */
1831                 if ((flags & TAVOR_SPECIAL_QP0_RSRC_MASK) == 0) {
1832                         status = tavor_conf_special_qp_cmd_post(state,
1833                             state->ts_spec_qp0->tr_indx, TAVOR_CMD_QP_SMI,
1834                             TAVOR_CMD_NOSLEEP_SPIN);
1835                         if (status != TAVOR_CMD_SUCCESS) {
1836                                 mutex_exit(&state->ts_spec_qplock);
1837                                 cmn_err(CE_CONT, "Tavor: CONF_SPECIAL_QP "
1838                                     "command failed: %08x\n", status);
1839                                 TNF_PROBE_1(tavor_conf_special_qp_cmd_fail,
1840                                     TAVOR_TNF_ERROR, "", tnf_uint, status,
1841                                     status);
1842                                 TAVOR_TNF_EXIT(tavor_special_qp_rsrc_alloc);
1843                                 return (IBT_INSUFF_RESOURCE);
1844                         }
1845                 }
1846 
1847                 /*
1848                  * Now check (and, if necessary, modify) the flags to indicate
1849                  * whether the allocation was successful
1850                  */
1851                 mask = (1 << (TAVOR_SPECIAL_QP0_RSRC + port));
1852                 if (flags & mask) {
1853                         mutex_exit(&state->ts_spec_qplock);
1854                         TNF_PROBE_1(tavor_ts_spec_qp0_alloc_already,
1855                             TAVOR_TNF_ERROR, "", tnf_uint, port, port);
1856                         TAVOR_TNF_EXIT(tavor_special_qp_rsrc_alloc);
1857                         return (IBT_QP_IN_USE);
1858                 }
1859                 state->ts_spec_qpflags |= mask;
1860                 *qp_rsrc = state->ts_spec_qp0;
1861 
1862         } else {
1863                 /*
1864                  * If this is the first QP1 allocation, then post
1865                  * a CONF_SPECIAL_QP firmware command
1866                  */
1867                 if ((flags & TAVOR_SPECIAL_QP1_RSRC_MASK) == 0) {
1868                         status = tavor_conf_special_qp_cmd_post(state,
1869                             state->ts_spec_qp1->tr_indx, TAVOR_CMD_QP_GSI,
1870                             TAVOR_CMD_NOSLEEP_SPIN);
1871                         if (status != TAVOR_CMD_SUCCESS) {
1872                                 mutex_exit(&state->ts_spec_qplock);
1873                                 cmn_err(CE_CONT, "Tavor: CONF_SPECIAL_QP "
1874                                     "command failed: %08x\n", status);
1875                                 TNF_PROBE_1(tavor_conf_special_qp_cmd_fail,
1876                                     TAVOR_TNF_ERROR, "", tnf_uint, status,
1877                                     status);
1878                                 TAVOR_TNF_EXIT(tavor_special_qp_rsrc_alloc);
1879                                 return (IBT_INSUFF_RESOURCE);
1880                         }
1881                 }
1882 
1883                 /*
1884                  * Now check (and, if necessary, modify) the flags to indicate
1885                  * whether the allocation was successful
1886                  */
1887                 mask = (1 << (TAVOR_SPECIAL_QP1_RSRC + port));
1888                 if (flags & mask) {
1889                         mutex_exit(&state->ts_spec_qplock);
1890                         TNF_PROBE_0(tavor_ts_spec_qp1_alloc_already,
1891                             TAVOR_TNF_ERROR, "");
1892                         TAVOR_TNF_EXIT(tavor_special_qp_rsrc_alloc);
1893                         return (IBT_QP_IN_USE);
1894                 }
1895                 state->ts_spec_qpflags |= mask;
1896                 *qp_rsrc = state->ts_spec_qp1;
1897         }
1898 
1899         mutex_exit(&state->ts_spec_qplock);
1900         TAVOR_TNF_EXIT(tavor_special_qp_rsrc_alloc);
1901         return (DDI_SUCCESS);
1902 }
1903 
1904 
1905 /*
1906  * tavor_special_qp_rsrc_free
1907  *    Context: Can be called from interrupt or base context.
1908  */
1909 static int
1910 tavor_special_qp_rsrc_free(tavor_state_t *state, ibt_sqp_type_t type,
1911     uint_t port)
1912 {
1913         uint_t          mask, flags;
1914         int             status;
1915 
1916         TAVOR_TNF_ENTER(tavor_special_qp_rsrc_free);
1917 
1918         mutex_enter(&state->ts_spec_qplock);
1919         if (type == IBT_SMI_SQP) {
1920                 mask = (1 << (TAVOR_SPECIAL_QP0_RSRC + port));
1921                 state->ts_spec_qpflags &= ~mask;
1922                 flags = state->ts_spec_qpflags;
1923 
1924                 /*
1925                  * If this is the last QP0 free, then post a CONF_SPECIAL_QP
1926                  * firmware command
1927                  */
1928                 if ((flags & TAVOR_SPECIAL_QP0_RSRC_MASK) == 0) {
1929                         status = tavor_conf_special_qp_cmd_post(state, 0,
1930                             TAVOR_CMD_QP_SMI, TAVOR_CMD_NOSLEEP_SPIN);
1931                         if (status != TAVOR_CMD_SUCCESS) {
1932                                 mutex_exit(&state->ts_spec_qplock);
1933                                 cmn_err(CE_CONT, "Tavor: CONF_SPECIAL_QP "
1934                                     "command failed: %08x\n", status);
1935                                 TNF_PROBE_1(tavor_conf_special_qp_cmd_fail,
1936                                     TAVOR_TNF_ERROR, "", tnf_uint, status,
1937                                     status);
1938                                 TAVOR_TNF_EXIT(tavor_special_qp_rsrc_free);
1939                                 return (ibc_get_ci_failure(0));
1940                         }
1941                 }
1942         } else {
1943                 mask = (1 << (TAVOR_SPECIAL_QP1_RSRC + port));
1944                 state->ts_spec_qpflags &= ~mask;
1945                 flags = state->ts_spec_qpflags;
1946 
1947                 /*
1948                  * If this is the last QP1 free, then post a CONF_SPECIAL_QP
1949                  * firmware command
1950                  */
1951                 if ((flags & TAVOR_SPECIAL_QP1_RSRC_MASK) == 0) {
1952                         status = tavor_conf_special_qp_cmd_post(state, 0,
1953                             TAVOR_CMD_QP_GSI, TAVOR_CMD_NOSLEEP_SPIN);
1954                         if (status != TAVOR_CMD_SUCCESS) {
1955                                 mutex_exit(&state->ts_spec_qplock);
1956                                 cmn_err(CE_CONT, "Tavor: CONF_SPECIAL_QP "
1957                                     "command failed: %08x\n", status);
1958                                 TNF_PROBE_1(tavor_conf_special_qp_cmd_fail,
1959                                     TAVOR_TNF_ERROR, "", tnf_uint, status,
1960                                     status);
1961                                 TAVOR_TNF_EXIT(tavor_special_qp_rsrc_free);
1962                                 return (ibc_get_ci_failure(0));
1963                         }
1964                 }
1965         }
1966 
1967         mutex_exit(&state->ts_spec_qplock);
1968         TAVOR_TNF_EXIT(tavor_special_qp_rsrc_free);
1969         return (DDI_SUCCESS);
1970 }
1971 
1972 
1973 /*
1974  * tavor_qp_sgl_to_logwqesz()
1975  *    Context: Can be called from interrupt or base context.
1976  */
1977 static void
1978 tavor_qp_sgl_to_logwqesz(tavor_state_t *state, uint_t num_sgl,
1979     tavor_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl)
1980 {
1981         uint_t  max_size, log2, actual_sgl;
1982 
1983         TAVOR_TNF_ENTER(tavor_qp_sgl_to_logwqesz);
1984 
1985         switch (wq_type) {
1986         case TAVOR_QP_WQ_TYPE_SENDQ:
1987                 /*
1988                  * Use requested maximum SGL to calculate max descriptor size
1989                  * (while guaranteeing that the descriptor size is a
1990                  * power-of-2 cachelines).
1991                  */
1992                 max_size = (TAVOR_QP_WQE_MLX_SND_HDRS + (num_sgl << 4));
1993                 log2 = highbit(max_size);
1994                 if ((max_size & (max_size - 1)) == 0) {
1995                         log2 = log2 - 1;
1996                 }
1997 
1998                 /* Make sure descriptor is at least the minimum size */
1999                 log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM);
2000 
2001                 /* Calculate actual number of SGL (given WQE size) */
2002                 actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_SND_HDRS) >> 4;
2003                 break;
2004 
2005         case TAVOR_QP_WQ_TYPE_RECVQ:
2006                 /*
2007                  * Same as above (except for Recv WQEs)
2008                  */
2009                 max_size = (TAVOR_QP_WQE_MLX_RCV_HDRS + (num_sgl << 4));
2010                 log2 = highbit(max_size);
2011                 if ((max_size & (max_size - 1)) == 0) {
2012                         log2 = log2 - 1;
2013                 }
2014 
2015                 /* Make sure descriptor is at least the minimum size */
2016                 log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM);
2017 
2018                 /* Calculate actual number of SGL (given WQE size) */
2019                 actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_RCV_HDRS) >> 4;
2020                 break;
2021 
2022         case TAVOR_QP_WQ_TYPE_SENDMLX_QP0:
2023                 /*
2024                  * Same as above (except for MLX transport WQEs).  For these
2025                  * WQEs we have to account for the space consumed by the
2026                  * "inline" packet headers.  (This is smaller than for QP1
2027                  * below because QP0 is not allowed to send packets with a GRH.
2028                  */
2029                 max_size = (TAVOR_QP_WQE_MLX_QP0_HDRS + (num_sgl << 4));
2030                 log2 = highbit(max_size);
2031                 if ((max_size & (max_size - 1)) == 0) {
2032                         log2 = log2 - 1;
2033                 }
2034 
2035                 /* Make sure descriptor is at least the minimum size */
2036                 log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM);
2037 
2038                 /* Calculate actual number of SGL (given WQE size) */
2039                 actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_QP0_HDRS) >> 4;
2040                 break;
2041 
2042         case TAVOR_QP_WQ_TYPE_SENDMLX_QP1:
2043                 /*
2044                  * Same as above.  For these WQEs we again have to account for
2045                  * the space consumed by the "inline" packet headers.  (This
2046                  * is larger than for QP0 above because we have to account for
2047                  * the possibility of a GRH in each packet - and this
2048                  * introduces an alignment issue that causes us to consume
2049                  * an additional 8 bytes).
2050                  */
2051                 max_size = (TAVOR_QP_WQE_MLX_QP1_HDRS + (num_sgl << 4));
2052                 log2 = highbit(max_size);
2053                 if ((max_size & (max_size - 1)) == 0) {
2054                         log2 = log2 - 1;
2055                 }
2056 
2057                 /* Make sure descriptor is at least the minimum size */
2058                 log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM);
2059 
2060                 /* Calculate actual number of SGL (given WQE size) */
2061                 actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_QP1_HDRS) >> 4;
2062                 break;
2063 
2064         default:
2065                 TAVOR_WARNING(state, "unexpected work queue type");
2066                 TNF_PROBE_0(tavor_qp_sgl_to_logwqesz_inv_wqtype_fail,
2067                     TAVOR_TNF_ERROR, "");
2068                 break;
2069         }
2070 
2071         /* Fill in the return values */
2072         *logwqesz = log2;
2073         *max_sgl  = min(state->ts_cfg_profile->cp_wqe_real_max_sgl, actual_sgl);
2074 
2075         TAVOR_TNF_EXIT(tavor_qp_sgl_to_logwqesz);
2076 }