1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <sys/sysmacros.h>
  27 #include <sys/ib/clients/rds/rds.h>
  28 #include <sys/ib/clients/rds/rds_kstat.h>
  29 
  30 #include <inet/ipclassifier.h>
  31 
  32 struct rds_kstat_s rds_kstat = {
  33         {"rds_nports",                  KSTAT_DATA_ULONG},
  34         {"rds_nsessions",               KSTAT_DATA_ULONG},
  35         {"rds_tx_bytes",                KSTAT_DATA_ULONG},
  36         {"rds_tx_pkts",                 KSTAT_DATA_ULONG},
  37         {"rds_tx_errors",               KSTAT_DATA_ULONG},
  38         {"rds_rx_bytes",                KSTAT_DATA_ULONG},
  39         {"rds_rx_pkts",                 KSTAT_DATA_ULONG},
  40         {"rds_rx_pkts_pending",         KSTAT_DATA_ULONG},
  41         {"rds_rx_errors",               KSTAT_DATA_ULONG},
  42         {"rds_tx_acks",                 KSTAT_DATA_ULONG},
  43         {"rds_post_recv_buf_called",    KSTAT_DATA_ULONG},
  44         {"rds_stalls_triggered",        KSTAT_DATA_ULONG},
  45         {"rds_stalls_sent",             KSTAT_DATA_ULONG},
  46         {"rds_unstalls_triggered",      KSTAT_DATA_ULONG},
  47         {"rds_unstalls_sent",           KSTAT_DATA_ULONG},
  48         {"rds_stalls_recvd",            KSTAT_DATA_ULONG},
  49         {"rds_unstalls_recvd",          KSTAT_DATA_ULONG},
  50         {"rds_stalls_ignored",          KSTAT_DATA_ULONG},
  51         {"rds_enobufs",                 KSTAT_DATA_ULONG},
  52         {"rds_ewouldblocks",            KSTAT_DATA_ULONG},
  53         {"rds_failovers",               KSTAT_DATA_ULONG},
  54         {"rds_port_quota",              KSTAT_DATA_ULONG},
  55         {"rds_port_quota_adjusted",     KSTAT_DATA_ULONG},
  56 };
  57 
  58 kstat_t *rds_kstatsp;
  59 static kmutex_t rds_kstat_mutex;
  60 
  61 
  62 struct  kmem_cache      *rds_alloc_cache;
  63 
  64 uint_t  rds_bind_fanout_size = RDS_BIND_FANOUT_SIZE;
  65 rds_bf_t *rds_bind_fanout;
  66 
  67 void
  68 rds_increment_kstat(kstat_named_t *ksnp, boolean_t lock, uint_t num)
  69 {
  70         if (lock)
  71                 mutex_enter(&rds_kstat_mutex);
  72         ksnp->value.ul += num;
  73         if (lock)
  74                 mutex_exit(&rds_kstat_mutex);
  75 }
  76 
  77 void
  78 rds_decrement_kstat(kstat_named_t *ksnp, boolean_t lock, uint_t num)
  79 {
  80         if (lock)
  81                 mutex_enter(&rds_kstat_mutex);
  82         ksnp->value.ul -= num;
  83         if (lock)
  84                 mutex_exit(&rds_kstat_mutex);
  85 }
  86 
  87 void
  88 rds_set_kstat(kstat_named_t *ksnp, boolean_t lock, ulong_t num)
  89 {
  90         if (lock)
  91                 mutex_enter(&rds_kstat_mutex);
  92         ksnp->value.ul = num;
  93         if (lock)
  94                 mutex_exit(&rds_kstat_mutex);
  95 }
  96 
  97 ulong_t
  98 rds_get_kstat(kstat_named_t *ksnp, boolean_t lock)
  99 {
 100         ulong_t value;
 101 
 102         if (lock)
 103                 mutex_enter(&rds_kstat_mutex);
 104         value = ksnp->value.ul;
 105         if (lock)
 106                 mutex_exit(&rds_kstat_mutex);
 107 
 108         return (value);
 109 }
 110 
 111 
 112 void
 113 rds_fini()
 114 {
 115         int     i;
 116 
 117         for (i = 0; i < rds_bind_fanout_size; i++) {
 118                 mutex_destroy(&rds_bind_fanout[i].rds_bf_lock);
 119         }
 120         kmem_free(rds_bind_fanout, rds_bind_fanout_size * sizeof (rds_bf_t));
 121 
 122         kmem_cache_destroy(rds_alloc_cache);
 123         kstat_delete(rds_kstatsp);
 124 }
 125 
 126 
 127 void
 128 rds_init()
 129 {
 130         rds_alloc_cache = kmem_cache_create("rds_alloc_cache",
 131             sizeof (rds_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
 132         rds_hash_init();
 133         /*
 134          * kstats
 135          */
 136         rds_kstatsp = kstat_create("rds", 0,
 137             "rds_kstat", "misc", KSTAT_TYPE_NAMED,
 138             sizeof (rds_kstat) / sizeof (kstat_named_t),
 139             KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
 140         if (rds_kstatsp != NULL) {
 141                 rds_kstatsp->ks_lock = &rds_kstat_mutex;
 142                 rds_kstatsp->ks_data = (void *)&rds_kstat;
 143                 kstat_install(rds_kstatsp);
 144         }
 145 }
 146 
 147 #define UINT_32_BITS 31
 148 void
 149 rds_hash_init()
 150 {
 151         int i;
 152 
 153         if (!ISP2(rds_bind_fanout_size)) {
 154                 /* Not a power of two. Round up to nearest power of two */
 155                 for (i = 0; i < UINT_32_BITS; i++) {
 156                         if (rds_bind_fanout_size < (1 << i))
 157                                 break;
 158                 }
 159                 rds_bind_fanout_size = 1 << i;
 160         }
 161         rds_bind_fanout = kmem_zalloc(rds_bind_fanout_size *
 162             sizeof (rds_bf_t), KM_SLEEP);
 163         for (i = 0; i < rds_bind_fanout_size; i++) {
 164                 mutex_init(&rds_bind_fanout[i].rds_bf_lock, NULL, MUTEX_DEFAULT,
 165                     NULL);
 166         }
 167 }
 168 
 169 void
 170 rds_free(rds_t *rds)
 171 {
 172         ASSERT(rds->rds_refcnt == 0);
 173         ASSERT(MUTEX_HELD(&rds->rds_lock));
 174         crfree(rds->rds_cred);
 175         kmem_cache_free(rds_alloc_cache, rds);
 176 }
 177 
 178 rds_t *
 179 rds_create(void *rds_ulpd, cred_t *credp)
 180 {
 181         rds_t   *rds;
 182 
 183         /* User must supply a credential. */
 184         if (credp == NULL)
 185                 return (NULL);
 186         rds = kmem_cache_alloc(rds_alloc_cache, KM_SLEEP);
 187         if (rds == NULL) {
 188                 return (NULL);
 189         }
 190 
 191         bzero(rds, sizeof (rds_t));
 192         mutex_init(&rds->rds_lock, NULL, MUTEX_DEFAULT, NULL);
 193         cv_init(&rds->rds_refcv, NULL, CV_DEFAULT, NULL);
 194         rds->rds_cred = credp;
 195         rds->rds_ulpd = rds_ulpd;
 196         rds->rds_zoneid = getzoneid();
 197         crhold(credp);
 198         rds->rds_refcnt++;
 199         return (rds);
 200 }
 201 
 202 
 203 /*
 204  * Hash list removal routine for rds_t structures.
 205  */
 206 void
 207 rds_bind_hash_remove(rds_t *rds, boolean_t caller_holds_lock)
 208 {
 209         rds_t   *rdsnext;
 210         kmutex_t *lockp;
 211 
 212         if (rds->rds_ptpbhn == NULL)
 213                 return;
 214 
 215         /*
 216          * Extract the lock pointer in case there are concurrent
 217          * hash_remove's for this instance.
 218          */
 219         ASSERT(rds->rds_port != 0);
 220         if (!caller_holds_lock) {
 221                 lockp = &rds_bind_fanout[RDS_BIND_HASH(rds->rds_port)].
 222                     rds_bf_lock;
 223                 ASSERT(lockp != NULL);
 224                 mutex_enter(lockp);
 225         }
 226 
 227         if (rds->rds_ptpbhn != NULL) {
 228                 rdsnext = rds->rds_bind_hash;
 229                 if (rdsnext != NULL) {
 230                         rdsnext->rds_ptpbhn = rds->rds_ptpbhn;
 231                         rds->rds_bind_hash = NULL;
 232                 }
 233                 *rds->rds_ptpbhn = rdsnext;
 234                 rds->rds_ptpbhn = NULL;
 235         }
 236 
 237         RDS_DEC_REF_CNT(rds);
 238 
 239         if (!caller_holds_lock) {
 240                 mutex_exit(lockp);
 241         }
 242 }
 243 
 244 void
 245 rds_bind_hash_insert(rds_bf_t *rdsbf, rds_t *rds)
 246 {
 247         rds_t   **rdsp;
 248         rds_t   *rdsnext;
 249 
 250         ASSERT(MUTEX_HELD(&rdsbf->rds_bf_lock));
 251         if (rds->rds_ptpbhn != NULL) {
 252                 rds_bind_hash_remove(rds, B_TRUE);
 253         }
 254 
 255         rdsp = &rdsbf->rds_bf_rds;
 256         rdsnext = rdsp[0];
 257 
 258         if (rdsnext != NULL) {
 259                 rdsnext->rds_ptpbhn = &rds->rds_bind_hash;
 260         }
 261         rds->rds_bind_hash = rdsnext;
 262         rds->rds_ptpbhn = rdsp;
 263         rdsp[0] = rds;
 264         RDS_INCR_REF_CNT(rds);
 265 
 266 }
 267 
 268 /*
 269  * Everything is in network byte order
 270  */
 271 /* ARGSUSED */
 272 rds_t *
 273 rds_fanout(ipaddr_t local_addr, ipaddr_t rem_addr,
 274     in_port_t local_port, in_port_t rem_port, zoneid_t zoneid)
 275 {
 276         rds_t   *rds;
 277         rds_bf_t *rdsbf;
 278 
 279         rdsbf = &rds_bind_fanout[RDS_BIND_HASH(local_port)];
 280         mutex_enter(&rdsbf->rds_bf_lock);
 281         rds = rdsbf->rds_bf_rds;
 282         while (rds != NULL) {
 283                 if (!(rds->rds_flags & RDS_CLOSING)) {
 284                         if ((RDS_MATCH(rds, local_port, local_addr)) &&
 285                             ((local_addr != INADDR_LOOPBACK) ||
 286                             (rds->rds_zoneid == zoneid))) {
 287                                 RDS_INCR_REF_CNT(rds);
 288                                 break;
 289                         }
 290                 }
 291                 rds = rds->rds_bind_hash;
 292         }
 293         mutex_exit(&rdsbf->rds_bf_lock);
 294         return (rds);
 295 }
 296 
 297 boolean_t
 298 rds_islocal(ipaddr_t addr)
 299 {
 300         ip_stack_t *ipst;
 301 
 302         ipst = netstack_find_by_zoneid(GLOBAL_ZONEID)->netstack_ip;
 303         ASSERT(ipst != NULL);
 304         if (ip_laddr_verify_v4(addr, ALL_ZONES, ipst, B_FALSE) == IPVL_BAD) {
 305                 netstack_rele(ipst->ips_netstack);
 306                 return (B_FALSE);
 307         }
 308         netstack_rele(ipst->ips_netstack);
 309         return (B_TRUE);
 310 }