1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 #include <sys/socket.h>
  27 #include <sys/ddi.h>
  28 #include <sys/sunddi.h>
  29 #include <sys/tsol/tndb.h>
  30 #include <sys/tsol/tnet.h>
  31 
  32 #include <netinet/in.h>
  33 #include <netinet/ip6.h>
  34 
  35 #include <inet/common.h>
  36 #include <inet/ip.h>
  37 #include <inet/ip6.h>
  38 #include <inet/ipclassifier.h>
  39 #include <inet/ipsec_impl.h>
  40 #include <inet/ipp_common.h>
  41 #include <inet/sctp_ip.h>
  42 
  43 #include "sctp_impl.h"
  44 #include "sctp_addr.h"
  45 
  46 /* Default association hash size.  The size must be a power of 2. */
  47 #define SCTP_CONN_HASH_SIZE     8192
  48 
  49 uint_t          sctp_conn_hash_size = SCTP_CONN_HASH_SIZE; /* /etc/system */
  50 
  51 /*
  52  * Cluster networking hook for traversing current assoc list.
  53  * This routine is used to extract the current list of live associations
  54  * which must continue to to be dispatched to this node.
  55  */
  56 int cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *,
  57     boolean_t);
  58 static int cl_sctp_walk_list_stack(int (*cl_callback)(cl_sctp_info_t *,
  59     void *), void *arg, boolean_t cansleep, sctp_stack_t *sctps);
  60 
  61 void
  62 sctp_hash_init(sctp_stack_t *sctps)
  63 {
  64         int i;
  65 
  66         /* Start with /etc/system value */
  67         sctps->sctps_conn_hash_size = sctp_conn_hash_size;
  68 
  69         if (sctps->sctps_conn_hash_size & (sctps->sctps_conn_hash_size - 1)) {
  70                 /* Not a power of two. Round up to nearest power of two */
  71                 for (i = 0; i < 31; i++) {
  72                         if (sctps->sctps_conn_hash_size < (1 << i))
  73                                 break;
  74                 }
  75                 sctps->sctps_conn_hash_size = 1 << i;
  76         }
  77         if (sctps->sctps_conn_hash_size < SCTP_CONN_HASH_SIZE) {
  78                 sctps->sctps_conn_hash_size = SCTP_CONN_HASH_SIZE;
  79                 cmn_err(CE_CONT, "using sctp_conn_hash_size = %u\n",
  80                     sctps->sctps_conn_hash_size);
  81         }
  82         sctps->sctps_conn_fanout =
  83             (sctp_tf_t *)kmem_zalloc(sctps->sctps_conn_hash_size *
  84             sizeof (sctp_tf_t), KM_SLEEP);
  85         for (i = 0; i < sctps->sctps_conn_hash_size; i++) {
  86                 mutex_init(&sctps->sctps_conn_fanout[i].tf_lock, NULL,
  87                     MUTEX_DEFAULT, NULL);
  88         }
  89         sctps->sctps_listen_fanout = kmem_zalloc(SCTP_LISTEN_FANOUT_SIZE *
  90             sizeof (sctp_tf_t), KM_SLEEP);
  91         for (i = 0; i < SCTP_LISTEN_FANOUT_SIZE; i++) {
  92                 mutex_init(&sctps->sctps_listen_fanout[i].tf_lock, NULL,
  93                     MUTEX_DEFAULT, NULL);
  94         }
  95         sctps->sctps_bind_fanout = kmem_zalloc(SCTP_BIND_FANOUT_SIZE *
  96             sizeof (sctp_tf_t), KM_SLEEP);
  97         for (i = 0; i < SCTP_BIND_FANOUT_SIZE; i++) {
  98                 mutex_init(&sctps->sctps_bind_fanout[i].tf_lock, NULL,
  99                     MUTEX_DEFAULT, NULL);
 100         }
 101 }
 102 
 103 void
 104 sctp_hash_destroy(sctp_stack_t *sctps)
 105 {
 106         int i;
 107 
 108         for (i = 0; i < sctps->sctps_conn_hash_size; i++) {
 109                 mutex_destroy(&sctps->sctps_conn_fanout[i].tf_lock);
 110         }
 111         kmem_free(sctps->sctps_conn_fanout, sctps->sctps_conn_hash_size *
 112             sizeof (sctp_tf_t));
 113         sctps->sctps_conn_fanout = NULL;
 114 
 115         for (i = 0; i < SCTP_LISTEN_FANOUT_SIZE; i++) {
 116                 mutex_destroy(&sctps->sctps_listen_fanout[i].tf_lock);
 117         }
 118         kmem_free(sctps->sctps_listen_fanout, SCTP_LISTEN_FANOUT_SIZE *
 119             sizeof (sctp_tf_t));
 120         sctps->sctps_listen_fanout = NULL;
 121 
 122         for (i = 0; i < SCTP_BIND_FANOUT_SIZE; i++) {
 123                 mutex_destroy(&sctps->sctps_bind_fanout[i].tf_lock);
 124         }
 125         kmem_free(sctps->sctps_bind_fanout, SCTP_BIND_FANOUT_SIZE *
 126             sizeof (sctp_tf_t));
 127         sctps->sctps_bind_fanout = NULL;
 128 }
 129 
 130 /*
 131  * Exported routine for extracting active SCTP associations.
 132  * Like TCP, we terminate the walk if the callback returns non-zero.
 133  *
 134  * Need to walk all sctp_stack_t instances since this clustering
 135  * interface is assumed global for all instances
 136  */
 137 int
 138 cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *),
 139     void *arg, boolean_t cansleep)
 140 {
 141         netstack_handle_t nh;
 142         netstack_t *ns;
 143         int ret = 0;
 144 
 145         netstack_next_init(&nh);
 146         while ((ns = netstack_next(&nh)) != NULL) {
 147                 ret = cl_sctp_walk_list_stack(cl_callback, arg, cansleep,
 148                     ns->netstack_sctp);
 149                 netstack_rele(ns);
 150         }
 151         netstack_next_fini(&nh);
 152         return (ret);
 153 }
 154 
 155 static int
 156 cl_sctp_walk_list_stack(int (*cl_callback)(cl_sctp_info_t *, void *),
 157     void *arg, boolean_t cansleep, sctp_stack_t *sctps)
 158 {
 159         sctp_t          *sctp;
 160         sctp_t          *sctp_prev;
 161         cl_sctp_info_t  cl_sctpi;
 162         uchar_t         *slist;
 163         uchar_t         *flist;
 164 
 165         sctp_prev = NULL;
 166         mutex_enter(&sctps->sctps_g_lock);
 167         sctp = list_head(&sctps->sctps_g_list);
 168         while (sctp != NULL) {
 169                 size_t  ssize;
 170                 size_t  fsize;
 171 
 172                 mutex_enter(&sctp->sctp_reflock);
 173                 if (sctp->sctp_condemned || sctp->sctp_state <= SCTPS_LISTEN) {
 174                         mutex_exit(&sctp->sctp_reflock);
 175                         sctp = list_next(&sctps->sctps_g_list, sctp);
 176                         continue;
 177                 }
 178                 sctp->sctp_refcnt++;
 179                 mutex_exit(&sctp->sctp_reflock);
 180                 mutex_exit(&sctps->sctps_g_lock);
 181                 if (sctp_prev != NULL)
 182                         SCTP_REFRELE(sctp_prev);
 183                 RUN_SCTP(sctp);
 184                 ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
 185                 fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs;
 186 
 187                 slist = kmem_alloc(ssize, cansleep ? KM_SLEEP : KM_NOSLEEP);
 188                 flist = kmem_alloc(fsize, cansleep ? KM_SLEEP : KM_NOSLEEP);
 189                 if (slist == NULL || flist == NULL) {
 190                         WAKE_SCTP(sctp);
 191                         if (slist != NULL)
 192                                 kmem_free(slist, ssize);
 193                         if (flist != NULL)
 194                                 kmem_free(flist, fsize);
 195                         SCTP_REFRELE(sctp);
 196                         return (1);
 197                 }
 198                 cl_sctpi.cl_sctpi_version = CL_SCTPI_V1;
 199                 sctp_get_saddr_list(sctp, slist, ssize);
 200                 sctp_get_faddr_list(sctp, flist, fsize);
 201                 cl_sctpi.cl_sctpi_nladdr = sctp->sctp_nsaddrs;
 202                 cl_sctpi.cl_sctpi_nfaddr = sctp->sctp_nfaddrs;
 203                 cl_sctpi.cl_sctpi_family = sctp->sctp_connp->conn_family;
 204                 if (cl_sctpi.cl_sctpi_family == AF_INET)
 205                         cl_sctpi.cl_sctpi_ipversion = IPV4_VERSION;
 206                 else
 207                         cl_sctpi.cl_sctpi_ipversion = IPV6_VERSION;
 208                 cl_sctpi.cl_sctpi_state = sctp->sctp_state;
 209                 cl_sctpi.cl_sctpi_lport = sctp->sctp_connp->conn_lport;
 210                 cl_sctpi.cl_sctpi_fport = sctp->sctp_connp->conn_fport;
 211                 cl_sctpi.cl_sctpi_handle = (cl_sctp_handle_t)sctp;
 212                 WAKE_SCTP(sctp);
 213                 cl_sctpi.cl_sctpi_laddrp = slist;
 214                 cl_sctpi.cl_sctpi_faddrp = flist;
 215                 if ((*cl_callback)(&cl_sctpi, arg) != 0) {
 216                         kmem_free(slist, ssize);
 217                         kmem_free(flist, fsize);
 218                         SCTP_REFRELE(sctp);
 219                         return (1);
 220                 }
 221                 /* list will be freed by cl_callback */
 222                 sctp_prev = sctp;
 223                 mutex_enter(&sctps->sctps_g_lock);
 224                 sctp = list_next(&sctps->sctps_g_list, sctp);
 225         }
 226         mutex_exit(&sctps->sctps_g_lock);
 227         if (sctp_prev != NULL)
 228                 SCTP_REFRELE(sctp_prev);
 229         return (0);
 230 }
 231 
 232 sctp_t *
 233 sctp_conn_match(in6_addr_t **faddrpp, uint32_t nfaddr, in6_addr_t *laddr,
 234     uint32_t ports, zoneid_t zoneid, iaflags_t iraflags, sctp_stack_t *sctps)
 235 {
 236         sctp_tf_t               *tf;
 237         sctp_t                  *sctp;
 238         sctp_faddr_t            *fp;
 239         conn_t                  *connp;
 240         in6_addr_t              **faddrs, **endaddrs = &faddrpp[nfaddr];
 241 
 242         tf = &(sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps, ports)]);
 243         mutex_enter(&tf->tf_lock);
 244 
 245         for (sctp = tf->tf_sctp; sctp != NULL; sctp =
 246             sctp->sctp_conn_hash_next) {
 247                 connp = sctp->sctp_connp;
 248                 if (ports != connp->conn_ports)
 249                         continue;
 250                 if (!(connp->conn_zoneid == zoneid ||
 251                     connp->conn_allzones ||
 252                     ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
 253                     (iraflags & IRAF_TX_MAC_EXEMPTABLE) &&
 254                     (iraflags & IRAF_TX_SHARED_ADDR))))
 255                         continue;
 256 
 257                 /* check for faddr match */
 258                 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
 259                         for (faddrs = faddrpp; faddrs < endaddrs; faddrs++) {
 260                                 if (IN6_ARE_ADDR_EQUAL(*faddrs,
 261                                     &fp->sf_faddr)) {
 262                                         /* check for laddr match */
 263                                         if (sctp_saddr_lookup(sctp, laddr, 0)
 264                                             != NULL) {
 265                                                 SCTP_REFHOLD(sctp);
 266                                                 mutex_exit(&tf->tf_lock);
 267                                                 return (sctp);
 268                                         }
 269                                 }
 270                         }
 271                 }
 272 
 273                 /* no match; continue to the next in the chain */
 274         }
 275 
 276         mutex_exit(&tf->tf_lock);
 277         return (sctp);
 278 }
 279 
 280 static sctp_t *
 281 listen_match(in6_addr_t *laddr, uint32_t ports, zoneid_t zoneid,
 282     iaflags_t iraflags, sctp_stack_t *sctps)
 283 {
 284         sctp_t                  *sctp;
 285         sctp_tf_t               *tf;
 286         uint16_t                lport;
 287         conn_t                  *connp;
 288 
 289         lport = ((uint16_t *)&ports)[1];
 290 
 291         tf = &(sctps->sctps_listen_fanout[SCTP_LISTEN_HASH(ntohs(lport))]);
 292         mutex_enter(&tf->tf_lock);
 293 
 294         for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_listen_hash_next) {
 295                 connp = sctp->sctp_connp;
 296                 if (lport != connp->conn_lport)
 297                         continue;
 298 
 299                 if (!(connp->conn_zoneid == zoneid ||
 300                     connp->conn_allzones ||
 301                     ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
 302                     (iraflags & IRAF_TX_MAC_EXEMPTABLE) &&
 303                     (iraflags & IRAF_TX_SHARED_ADDR))))
 304                         continue;
 305 
 306                 if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
 307                         SCTP_REFHOLD(sctp);
 308                         goto done;
 309                 }
 310                 /* no match; continue to the next in the chain */
 311         }
 312 
 313 done:
 314         mutex_exit(&tf->tf_lock);
 315         return (sctp);
 316 }
 317 
 318 /* called by ipsec_sctp_pol */
 319 conn_t *
 320 sctp_find_conn(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
 321     zoneid_t zoneid, iaflags_t iraflags, sctp_stack_t *sctps)
 322 {
 323         sctp_t *sctp;
 324 
 325         sctp = sctp_conn_match(&src, 1, dst, ports, zoneid, iraflags, sctps);
 326         if (sctp == NULL) {
 327                 /* Not in conn fanout; check listen fanout */
 328                 sctp = listen_match(dst, ports, zoneid, iraflags, sctps);
 329                 if (sctp == NULL)
 330                         return (NULL);
 331         }
 332         return (sctp->sctp_connp);
 333 }
 334 
 335 /*
 336  * This is called from sctp_fanout() with IP header src & dst addresses.
 337  * First call sctp_conn_match() to get a match by passing in src & dst
 338  * addresses from IP header.
 339  * However sctp_conn_match() can return no match under condition such as :
 340  * A host can send an INIT ACK from a different address than the INIT was sent
 341  * to (in a multi-homed env).
 342  * According to RFC4960, a host can send additional addresses in an INIT
 343  * ACK chunk.
 344  * Therefore extract all addresses from the INIT ACK chunk, pass to
 345  * sctp_conn_match() to get a match.
 346  */
 347 static sctp_t *
 348 sctp_lookup_by_faddrs(mblk_t *mp, sctp_hdr_t *sctph, in6_addr_t *srcp,
 349     in6_addr_t *dstp, uint32_t ports, zoneid_t zoneid, sctp_stack_t *sctps,
 350     iaflags_t iraflags)
 351 {
 352         sctp_t                  *sctp;
 353         sctp_chunk_hdr_t        *ich;
 354         sctp_init_chunk_t       *iack;
 355         sctp_parm_hdr_t         *ph;
 356         ssize_t                 mlen, remaining;
 357         uint16_t                param_type, addr_len = PARM_ADDR4_LEN;
 358         in6_addr_t              src;
 359         in6_addr_t              **addrbuf = NULL, **faddrpp = NULL;
 360         boolean_t               isv4;
 361         uint32_t                totaddr, nfaddr = 0;
 362 
 363         /*
 364          * If we get a match with the passed-in IP header src & dst addresses,
 365          * quickly return the matched sctp.
 366          */
 367         if ((sctp = sctp_conn_match(&srcp, 1, dstp, ports, zoneid, iraflags,
 368             sctps)) != NULL) {
 369                 return (sctp);
 370         }
 371 
 372         /*
 373          * Currently sctph is set to NULL in icmp error fanout case
 374          * (ip_fanout_sctp()).
 375          * The above sctp_conn_match() should handle that, otherwise
 376          * return no match found.
 377          */
 378         if (sctph == NULL)
 379                 return (NULL);
 380 
 381         /*
 382          * Do a pullup again in case the previous one was partially successful,
 383          * so try to complete the pullup here and have a single contiguous
 384          * chunk for processing of entire INIT ACK chunk below.
 385          */
 386         if (mp->b_cont != NULL) {
 387                 if (pullupmsg(mp, -1) == 0) {
 388                         return (NULL);
 389                 }
 390         }
 391 
 392         mlen = mp->b_wptr - (uchar_t *)(sctph + 1);
 393         if ((ich = sctp_first_chunk((uchar_t *)(sctph + 1), mlen)) == NULL) {
 394                 return (NULL);
 395         }
 396 
 397         if (ich->sch_id == CHUNK_INIT_ACK) {
 398                 remaining = ntohs(ich->sch_len) - sizeof (*ich) -
 399                     sizeof (*iack);
 400                 if (remaining < sizeof (*ph)) {
 401                         return (NULL);
 402                 }
 403 
 404                 isv4 = (iraflags & IRAF_IS_IPV4) ? B_TRUE : B_FALSE;
 405                 if (!isv4)
 406                         addr_len = PARM_ADDR6_LEN;
 407                 totaddr = remaining/addr_len;
 408 
 409                 iack = (sctp_init_chunk_t *)(ich + 1);
 410                 ph = (sctp_parm_hdr_t *)(iack + 1);
 411 
 412                 addrbuf = (in6_addr_t **)
 413                     kmem_zalloc(totaddr * sizeof (in6_addr_t *), KM_NOSLEEP);
 414                 if (addrbuf == NULL)
 415                         return (NULL);
 416                 faddrpp = addrbuf;
 417 
 418                 while (ph != NULL) {
 419                         /*
 420                          * According to RFC4960 :
 421                          * All integer fields in an SCTP packet MUST be
 422                          * transmitted in network byte order,
 423                          * unless otherwise stated.
 424                          * Therefore convert the param type to host byte order.
 425                          * Also do not add src address present in IP header
 426                          * as it has already been thru sctp_conn_match() above.
 427                          */
 428                         param_type = ntohs(ph->sph_type);
 429                         switch (param_type) {
 430                         case PARM_ADDR4:
 431                                 IN6_INADDR_TO_V4MAPPED((struct in_addr *)
 432                                     (ph + 1), &src);
 433                                 if (IN6_ARE_ADDR_EQUAL(&src, srcp))
 434                                         break;
 435                                 *faddrpp = (in6_addr_t *)
 436                                     kmem_zalloc(sizeof (in6_addr_t),
 437                                     KM_NOSLEEP);
 438                                 if (*faddrpp == NULL)
 439                                         break;
 440                                 IN6_INADDR_TO_V4MAPPED((struct in_addr *)
 441                                     (ph + 1), *faddrpp);
 442                                 nfaddr++;
 443                                 faddrpp++;
 444                                 break;
 445                         case PARM_ADDR6:
 446                                 *faddrpp = (in6_addr_t *)(ph + 1);
 447                                 if (IN6_ARE_ADDR_EQUAL(*faddrpp, srcp))
 448                                         break;
 449                                 nfaddr++;
 450                                 faddrpp++;
 451                                 break;
 452                         default:
 453                                 break;
 454                         }
 455                         ph = sctp_next_parm(ph, &remaining);
 456                 }
 457 
 458                 ASSERT(nfaddr < totaddr);
 459 
 460                 if (nfaddr > 0) {
 461                         sctp = sctp_conn_match(addrbuf, nfaddr, dstp, ports,
 462                             zoneid, iraflags, sctps);
 463 
 464                         if (isv4) {
 465                                 for (faddrpp = addrbuf; nfaddr > 0;
 466                                     faddrpp++, nfaddr--) {
 467                                         if (IN6_IS_ADDR_V4MAPPED(*faddrpp)) {
 468                                                 kmem_free(*faddrpp,
 469                                                     sizeof (in6_addr_t));
 470                                         }
 471                                 }
 472                         }
 473                 }
 474                 kmem_free(addrbuf, totaddr * sizeof (in6_addr_t *));
 475         }
 476         return (sctp);
 477 }
 478 
 479 /*
 480  * Fanout to a sctp instance.
 481  */
 482 conn_t *
 483 sctp_fanout(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
 484     ip_recv_attr_t *ira, mblk_t *mp, sctp_stack_t *sctps, sctp_hdr_t *sctph)
 485 {
 486         zoneid_t zoneid = ira->ira_zoneid;
 487         iaflags_t iraflags = ira->ira_flags;
 488         sctp_t *sctp;
 489 
 490         sctp = sctp_lookup_by_faddrs(mp, sctph, src, dst, ports, zoneid,
 491             sctps, iraflags);
 492         if (sctp == NULL) {
 493                 /* Not in conn fanout; check listen fanout */
 494                 sctp = listen_match(dst, ports, zoneid, iraflags, sctps);
 495                 if (sctp == NULL)
 496                         return (NULL);
 497                 /*
 498                  * On systems running trusted extensions, check if dst
 499                  * should accept the packet. "IPV6_VERSION" indicates
 500                  * that dst is in 16 byte AF_INET6 format. IPv4-mapped
 501                  * IPv6 addresses are supported.
 502                  */
 503                 if ((iraflags & IRAF_SYSTEM_LABELED) &&
 504                     !tsol_receive_local(mp, dst, IPV6_VERSION, ira,
 505                     sctp->sctp_connp)) {
 506                         DTRACE_PROBE3(
 507                             tx__ip__log__info__classify__sctp,
 508                             char *,
 509                             "connp(1) could not receive mp(2)",
 510                             conn_t *, sctp->sctp_connp, mblk_t *, mp);
 511                         SCTP_REFRELE(sctp);
 512                         return (NULL);
 513                 }
 514         }
 515         /*
 516          * For labeled systems, there's no need to check the
 517          * label here.  It's known to be good as we checked
 518          * before allowing the connection to become bound.
 519          */
 520         return (sctp->sctp_connp);
 521 }
 522 
 523 /*
 524  * Fanout for ICMP errors for SCTP
 525  * The caller puts <fport, lport> in the ports parameter.
 526  */
 527 void
 528 ip_fanout_sctp(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h, uint32_t ports,
 529     ip_recv_attr_t *ira)
 530 {
 531         sctp_t          *sctp;
 532         conn_t          *connp;
 533         in6_addr_t      map_src, map_dst;
 534         in6_addr_t      *src, *dst;
 535         boolean_t       secure;
 536         ill_t           *ill = ira->ira_ill;
 537         ip_stack_t      *ipst = ill->ill_ipst;
 538         netstack_t      *ns = ipst->ips_netstack;
 539         ipsec_stack_t   *ipss = ns->netstack_ipsec;
 540         sctp_stack_t    *sctps = ns->netstack_sctp;
 541         iaflags_t       iraflags = ira->ira_flags;
 542         ill_t           *rill = ira->ira_rill;
 543 
 544         ASSERT(iraflags & IRAF_ICMP_ERROR);
 545 
 546         secure = iraflags & IRAF_IPSEC_SECURE;
 547 
 548         /* Assume IP provides aligned packets - otherwise toss */
 549         if (!OK_32PTR(mp->b_rptr)) {
 550                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 551                 ip_drop_input("ipIfStatsInDiscards", mp, ill);
 552                 freemsg(mp);
 553                 return;
 554         }
 555 
 556         if (!(iraflags & IRAF_IS_IPV4)) {
 557                 src = &ip6h->ip6_src;
 558                 dst = &ip6h->ip6_dst;
 559         } else {
 560                 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src);
 561                 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst);
 562                 src = &map_src;
 563                 dst = &map_dst;
 564         }
 565         connp = sctp_fanout(src, dst, ports, ira, mp, sctps, NULL);
 566         if (connp == NULL) {
 567                 ip_fanout_sctp_raw(mp, ipha, ip6h, ports, ira);
 568                 return;
 569         }
 570         sctp = CONN2SCTP(connp);
 571 
 572         /*
 573          * We check some fields in conn_t without holding a lock.
 574          * This should be fine.
 575          */
 576         if (((iraflags & IRAF_IS_IPV4) ?
 577             CONN_INBOUND_POLICY_PRESENT(connp, ipss) :
 578             CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss)) ||
 579             secure) {
 580                 mp = ipsec_check_inbound_policy(mp, connp, ipha,
 581                     ip6h, ira);
 582                 if (mp == NULL) {
 583                         SCTP_REFRELE(sctp);
 584                         return;
 585                 }
 586         }
 587 
 588         ira->ira_ill = ira->ira_rill = NULL;
 589 
 590         mutex_enter(&sctp->sctp_lock);
 591         if (sctp->sctp_running) {
 592                 sctp_add_recvq(sctp, mp, B_FALSE, ira);
 593                 mutex_exit(&sctp->sctp_lock);
 594         } else {
 595                 sctp->sctp_running = B_TRUE;
 596                 mutex_exit(&sctp->sctp_lock);
 597 
 598                 mutex_enter(&sctp->sctp_recvq_lock);
 599                 if (sctp->sctp_recvq != NULL) {
 600                         sctp_add_recvq(sctp, mp, B_TRUE, ira);
 601                         mutex_exit(&sctp->sctp_recvq_lock);
 602                         WAKE_SCTP(sctp);
 603                 } else {
 604                         mutex_exit(&sctp->sctp_recvq_lock);
 605                         if (ira->ira_flags & IRAF_ICMP_ERROR) {
 606                                 sctp_icmp_error(sctp, mp);
 607                         } else {
 608                                 sctp_input_data(sctp, mp, ira);
 609                         }
 610                         WAKE_SCTP(sctp);
 611                 }
 612         }
 613         SCTP_REFRELE(sctp);
 614         ira->ira_ill = ill;
 615         ira->ira_rill = rill;
 616 }
 617 
 618 void
 619 sctp_conn_hash_remove(sctp_t *sctp)
 620 {
 621         sctp_tf_t *tf = sctp->sctp_conn_tfp;
 622 
 623         if (!tf) {
 624                 return;
 625         }
 626         /*
 627          * On a clustered note send this notification to the clustering
 628          * subsystem.
 629          */
 630         if (cl_sctp_disconnect != NULL) {
 631                 (*cl_sctp_disconnect)(sctp->sctp_connp->conn_family,
 632                     (cl_sctp_handle_t)sctp);
 633         }
 634 
 635         mutex_enter(&tf->tf_lock);
 636         ASSERT(tf->tf_sctp);
 637         if (tf->tf_sctp == sctp) {
 638                 tf->tf_sctp = sctp->sctp_conn_hash_next;
 639                 if (sctp->sctp_conn_hash_next) {
 640                         ASSERT(tf->tf_sctp->sctp_conn_hash_prev == sctp);
 641                         tf->tf_sctp->sctp_conn_hash_prev = NULL;
 642                 }
 643         } else {
 644                 ASSERT(sctp->sctp_conn_hash_prev);
 645                 ASSERT(sctp->sctp_conn_hash_prev->sctp_conn_hash_next == sctp);
 646                 sctp->sctp_conn_hash_prev->sctp_conn_hash_next =
 647                     sctp->sctp_conn_hash_next;
 648 
 649                 if (sctp->sctp_conn_hash_next) {
 650                         ASSERT(sctp->sctp_conn_hash_next->sctp_conn_hash_prev
 651                             == sctp);
 652                         sctp->sctp_conn_hash_next->sctp_conn_hash_prev =
 653                             sctp->sctp_conn_hash_prev;
 654                 }
 655         }
 656         sctp->sctp_conn_hash_next = NULL;
 657         sctp->sctp_conn_hash_prev = NULL;
 658         sctp->sctp_conn_tfp = NULL;
 659         mutex_exit(&tf->tf_lock);
 660 }
 661 
 662 void
 663 sctp_conn_hash_insert(sctp_tf_t *tf, sctp_t *sctp, int caller_holds_lock)
 664 {
 665         if (sctp->sctp_conn_tfp) {
 666                 sctp_conn_hash_remove(sctp);
 667         }
 668 
 669         if (!caller_holds_lock) {
 670                 mutex_enter(&tf->tf_lock);
 671         } else {
 672                 ASSERT(MUTEX_HELD(&tf->tf_lock));
 673         }
 674 
 675         sctp->sctp_conn_hash_next = tf->tf_sctp;
 676         if (tf->tf_sctp) {
 677                 tf->tf_sctp->sctp_conn_hash_prev = sctp;
 678         }
 679         sctp->sctp_conn_hash_prev = NULL;
 680         tf->tf_sctp = sctp;
 681         sctp->sctp_conn_tfp = tf;
 682         if (!caller_holds_lock) {
 683                 mutex_exit(&tf->tf_lock);
 684         }
 685 }
 686 
 687 void
 688 sctp_listen_hash_remove(sctp_t *sctp)
 689 {
 690         sctp_tf_t *tf = sctp->sctp_listen_tfp;
 691         conn_t  *connp = sctp->sctp_connp;
 692 
 693         if (!tf) {
 694                 return;
 695         }
 696         /*
 697          * On a clustered note send this notification to the clustering
 698          * subsystem.
 699          */
 700         if (cl_sctp_unlisten != NULL) {
 701                 uchar_t *slist;
 702                 ssize_t ssize;
 703 
 704                 ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
 705                 slist = kmem_alloc(ssize, KM_SLEEP);
 706                 sctp_get_saddr_list(sctp, slist, ssize);
 707                 (*cl_sctp_unlisten)(connp->conn_family, slist,
 708                     sctp->sctp_nsaddrs, connp->conn_lport);
 709                 /* list will be freed by the clustering module */
 710         }
 711 
 712         mutex_enter(&tf->tf_lock);
 713         ASSERT(tf->tf_sctp);
 714         if (tf->tf_sctp == sctp) {
 715                 tf->tf_sctp = sctp->sctp_listen_hash_next;
 716                 if (sctp->sctp_listen_hash_next != NULL) {
 717                         ASSERT(tf->tf_sctp->sctp_listen_hash_prev == sctp);
 718                         tf->tf_sctp->sctp_listen_hash_prev = NULL;
 719                 }
 720         } else {
 721                 ASSERT(sctp->sctp_listen_hash_prev);
 722                 ASSERT(sctp->sctp_listen_hash_prev->sctp_listen_hash_next ==
 723                     sctp);
 724                 ASSERT(sctp->sctp_listen_hash_next == NULL ||
 725                     sctp->sctp_listen_hash_next->sctp_listen_hash_prev == sctp);
 726 
 727                 sctp->sctp_listen_hash_prev->sctp_listen_hash_next =
 728                     sctp->sctp_listen_hash_next;
 729 
 730                 if (sctp->sctp_listen_hash_next != NULL) {
 731                         sctp_t *next = sctp->sctp_listen_hash_next;
 732 
 733                         ASSERT(next->sctp_listen_hash_prev == sctp);
 734                         next->sctp_listen_hash_prev =
 735                             sctp->sctp_listen_hash_prev;
 736                 }
 737         }
 738         sctp->sctp_listen_hash_next = NULL;
 739         sctp->sctp_listen_hash_prev = NULL;
 740         sctp->sctp_listen_tfp = NULL;
 741         mutex_exit(&tf->tf_lock);
 742 }
 743 
 744 void
 745 sctp_listen_hash_insert(sctp_tf_t *tf, sctp_t *sctp)
 746 {
 747         conn_t  *connp = sctp->sctp_connp;
 748 
 749         if (sctp->sctp_listen_tfp) {
 750                 sctp_listen_hash_remove(sctp);
 751         }
 752 
 753         mutex_enter(&tf->tf_lock);
 754         sctp->sctp_listen_hash_next = tf->tf_sctp;
 755         if (tf->tf_sctp) {
 756                 tf->tf_sctp->sctp_listen_hash_prev = sctp;
 757         }
 758         sctp->sctp_listen_hash_prev = NULL;
 759         tf->tf_sctp = sctp;
 760         sctp->sctp_listen_tfp = tf;
 761         mutex_exit(&tf->tf_lock);
 762         /*
 763          * On a clustered note send this notification to the clustering
 764          * subsystem.
 765          */
 766         if (cl_sctp_listen != NULL) {
 767                 uchar_t *slist;
 768                 ssize_t ssize;
 769 
 770                 ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
 771                 slist = kmem_alloc(ssize, KM_SLEEP);
 772                 sctp_get_saddr_list(sctp, slist, ssize);
 773                 (*cl_sctp_listen)(connp->conn_family, slist,
 774                     sctp->sctp_nsaddrs, connp->conn_lport);
 775                 /* list will be freed by the clustering module */
 776         }
 777 }
 778 
 779 /*
 780  * Hash list insertion routine for sctp_t structures.
 781  * Inserts entries with the ones bound to a specific IP address first
 782  * followed by those bound to INADDR_ANY.
 783  */
 784 void
 785 sctp_bind_hash_insert(sctp_tf_t *tbf, sctp_t *sctp, int caller_holds_lock)
 786 {
 787         sctp_t  **sctpp;
 788         sctp_t  *sctpnext;
 789 
 790         if (sctp->sctp_ptpbhn != NULL) {
 791                 ASSERT(!caller_holds_lock);
 792                 sctp_bind_hash_remove(sctp);
 793         }
 794         sctpp = &tbf->tf_sctp;
 795         if (!caller_holds_lock) {
 796                 mutex_enter(&tbf->tf_lock);
 797         } else {
 798                 ASSERT(MUTEX_HELD(&tbf->tf_lock));
 799         }
 800         sctpnext = sctpp[0];
 801         if (sctpnext) {
 802                 sctpnext->sctp_ptpbhn = &sctp->sctp_bind_hash;
 803         }
 804         sctp->sctp_bind_hash = sctpnext;
 805         sctp->sctp_ptpbhn = sctpp;
 806         sctpp[0] = sctp;
 807         /* For sctp_*_hash_remove */
 808         sctp->sctp_bind_lockp = &tbf->tf_lock;
 809         if (!caller_holds_lock)
 810                 mutex_exit(&tbf->tf_lock);
 811 }
 812 
 813 /*
 814  * Hash list removal routine for sctp_t structures.
 815  */
 816 void
 817 sctp_bind_hash_remove(sctp_t *sctp)
 818 {
 819         sctp_t  *sctpnext;
 820         kmutex_t *lockp;
 821 
 822         lockp = sctp->sctp_bind_lockp;
 823 
 824         if (sctp->sctp_ptpbhn == NULL)
 825                 return;
 826 
 827         ASSERT(lockp != NULL);
 828         mutex_enter(lockp);
 829         if (sctp->sctp_ptpbhn) {
 830                 sctpnext = sctp->sctp_bind_hash;
 831                 if (sctpnext) {
 832                         sctpnext->sctp_ptpbhn = sctp->sctp_ptpbhn;
 833                         sctp->sctp_bind_hash = NULL;
 834                 }
 835                 *sctp->sctp_ptpbhn = sctpnext;
 836                 sctp->sctp_ptpbhn = NULL;
 837         }
 838         mutex_exit(lockp);
 839         sctp->sctp_bind_lockp = NULL;
 840 }
 841 
 842 /*
 843  * Similar to but different from sctp_conn_match().
 844  *
 845  * Matches sets of addresses as follows: if the argument addr set is
 846  * a complete subset of the corresponding addr set in the sctp_t, it
 847  * is a match.
 848  *
 849  * Caller must hold tf->tf_lock.
 850  *
 851  * Returns with a SCTP_REFHOLD sctp structure. Caller must do a SCTP_REFRELE.
 852  */
 853 sctp_t *
 854 sctp_lookup(sctp_t *sctp1, in6_addr_t *faddr, sctp_tf_t *tf, uint32_t *ports,
 855     int min_state)
 856 {
 857         sctp_t *sctp;
 858         sctp_faddr_t *fp;
 859 
 860         ASSERT(MUTEX_HELD(&tf->tf_lock));
 861 
 862         for (sctp = tf->tf_sctp; sctp != NULL;
 863             sctp = sctp->sctp_conn_hash_next) {
 864                 if (*ports != sctp->sctp_connp->conn_ports ||
 865                     sctp->sctp_state < min_state) {
 866                         continue;
 867                 }
 868 
 869                 /* check for faddr match */
 870                 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
 871                         if (IN6_ARE_ADDR_EQUAL(faddr, &fp->sf_faddr)) {
 872                                 break;
 873                         }
 874                 }
 875 
 876                 if (fp == NULL) {
 877                         /* no faddr match; keep looking */
 878                         continue;
 879                 }
 880 
 881                 /*
 882                  * There is an existing association with the same peer
 883                  * address.  So now we need to check if our local address
 884                  * set overlaps with the one of the existing association.
 885                  * If they overlap, we should return it.
 886                  */
 887                 if (sctp_compare_saddrs(sctp1, sctp) <= SCTP_ADDR_OVERLAP) {
 888                         goto done;
 889                 }
 890 
 891                 /* no match; continue searching */
 892         }
 893 
 894 done:
 895         if (sctp != NULL) {
 896                 SCTP_REFHOLD(sctp);
 897         }
 898         return (sctp);
 899 }