1 /*
   2  * Copyright (C) 1993-2001, 2003 by Darren Reed.
   3  *
   4  * See the IPFILTER.LICENCE file for details on licencing.
   5  *
   6  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
   7  */
   8 
   9 #if !defined(lint)
  10 static const char sccsid[] = "@(#)ip_fil_solaris.c      1.7 07/22/06 (C) 1993-2000 Darren Reed";
  11 static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $";
  12 #endif
  13 
  14 #include <sys/types.h>
  15 #include <sys/errno.h>
  16 #include <sys/param.h>
  17 #include <sys/cpuvar.h>
  18 #include <sys/open.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/filio.h>
  21 #include <sys/systm.h>
  22 #include <sys/strsubr.h>
  23 #include <sys/cred.h>
  24 #include <sys/ddi.h>
  25 #include <sys/sunddi.h>
  26 #include <sys/ksynch.h>
  27 #include <sys/kmem.h>
  28 #include <sys/mkdev.h>
  29 #include <sys/protosw.h>
  30 #include <sys/socket.h>
  31 #include <sys/dditypes.h>
  32 #include <sys/cmn_err.h>
  33 #include <sys/zone.h>
  34 #include <net/if.h>
  35 #include <net/af.h>
  36 #include <net/route.h>
  37 #include <netinet/in.h>
  38 #include <netinet/in_systm.h>
  39 #include <netinet/ip.h>
  40 #include <netinet/ip_var.h>
  41 #include <netinet/tcp.h>
  42 #include <netinet/udp.h>
  43 #include <netinet/tcpip.h>
  44 #include <netinet/ip_icmp.h>
  45 #include "netinet/ip_compat.h"
  46 #ifdef  USE_INET6
  47 # include <netinet/icmp6.h>
  48 #endif
  49 #include "netinet/ip_fil.h"
  50 #include "netinet/ip_nat.h"
  51 #include "netinet/ip_frag.h"
  52 #include "netinet/ip_state.h"
  53 #include "netinet/ip_auth.h"
  54 #include "netinet/ip_proxy.h"
  55 #include "netinet/ipf_stack.h"
  56 #ifdef  IPFILTER_LOOKUP
  57 # include "netinet/ip_lookup.h"
  58 #endif
  59 #include <inet/ip_ire.h>
  60 
  61 #include <sys/md5.h>
  62 #include <sys/neti.h>
  63 
  64 static  int     frzerostats __P((caddr_t, ipf_stack_t *));
  65 static  int     fr_setipfloopback __P((int, ipf_stack_t *));
  66 static  int     fr_enableipf __P((ipf_stack_t *, int));
  67 static  int     fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp));
  68 static  int     ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, void *));
  69 static  int     ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, void *));
  70 static  int     ipf_hook __P((hook_data_t, int, int, void *));
  71 static  int     ipf_hook4_in __P((hook_event_token_t, hook_data_t, void *));
  72 static  int     ipf_hook4_out __P((hook_event_token_t, hook_data_t, void *));
  73 static  int     ipf_hook4_loop_out __P((hook_event_token_t, hook_data_t,
  74     void *));
  75 static  int     ipf_hook4_loop_in __P((hook_event_token_t, hook_data_t, void *));
  76 static  int     ipf_hook4 __P((hook_data_t, int, int, void *));
  77 static  int     ipf_hook6_out __P((hook_event_token_t, hook_data_t, void *));
  78 static  int     ipf_hook6_in __P((hook_event_token_t, hook_data_t, void *));
  79 static  int     ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t,
  80     void *));
  81 static  int     ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t,
  82     void *));
  83 static  int     ipf_hook6 __P((hook_data_t, int, int, void *));
  84 extern  int     ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
  85 extern  int     ipf_frruleiter __P((void *, int, void *, ipf_stack_t *));
  86 
  87 #if SOLARIS2 < 10
  88 #if SOLARIS2 >= 7
  89 u_int           *ip_ttl_ptr = NULL;
  90 u_int           *ip_mtudisc = NULL;
  91 # if SOLARIS2 >= 8
  92 int             *ip_forwarding = NULL;
  93 u_int           *ip6_forwarding = NULL;
  94 # else
  95 u_int           *ip_forwarding = NULL;
  96 # endif
  97 #else
  98 u_long          *ip_ttl_ptr = NULL;
  99 u_long          *ip_mtudisc = NULL;
 100 u_long          *ip_forwarding = NULL;
 101 #endif
 102 #endif
 103 
 104 
 105 /* ------------------------------------------------------------------------ */
 106 /* Function:    ipldetach                                                   */
 107 /* Returns:     int - 0 == success, else error.                             */
 108 /* Parameters:  Nil                                                         */
 109 /*                                                                          */
 110 /* This function is responsible for undoing anything that might have been   */
 111 /* done in a call to iplattach().  It must be able to clean up from a call  */
 112 /* to iplattach() that did not succeed.  Why might that happen?  Someone    */
 113 /* configures a table to be so large that we cannot allocate enough memory  */
 114 /* for it.                                                                  */
 115 /* ------------------------------------------------------------------------ */
 116 int ipldetach(ifs)
 117 ipf_stack_t *ifs;
 118 {
 119 
 120         ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0);
 121 
 122 #if SOLARIS2 < 10
 123 
 124         if (ifs->ifs_fr_control_forwarding & 2) {
 125                 if (ip_forwarding != NULL)
 126                         *ip_forwarding = 0;
 127 #if SOLARIS2 >= 8
 128                 if (ip6_forwarding != NULL)
 129                         *ip6_forwarding = 0;
 130 #endif
 131         }
 132 #endif
 133 
 134         /*
 135          * This lock needs to be dropped around the net_hook_unregister calls
 136          * because we can deadlock here with:
 137          * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
 138          * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running)
 139          */
 140         RWLOCK_EXIT(&ifs->ifs_ipf_global);
 141 
 142 #define UNDO_HOOK(_f, _b, _e, _h)                                       \
 143         do {                                                            \
 144                 if (ifs->_f != NULL) {                                       \
 145                         if (ifs->_b) {                                       \
 146                                 ifs->_b = (net_hook_unregister(ifs->_f,   \
 147                                            _e, ifs->_h) != 0);               \
 148                                 if (!ifs->_b) {                              \
 149                                         hook_free(ifs->_h);          \
 150                                         ifs->_h = NULL;                      \
 151                                 }                                       \
 152                         } else if (ifs->_h != NULL) {                        \
 153                                 hook_free(ifs->_h);                  \
 154                                 ifs->_h = NULL;                              \
 155                         }                                               \
 156                 }                                                       \
 157                 _NOTE(CONSTCOND)                                        \
 158         } while (0)
 159 
 160         /*
 161          * Remove IPv6 Hooks
 162          */
 163         if (ifs->ifs_ipf_ipv6 != NULL) {
 164                 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_in,
 165                           NH_PHYSICAL_IN, ifs_ipfhook6_in);
 166                 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_out,
 167                           NH_PHYSICAL_OUT, ifs_ipfhook6_out);
 168                 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_nic_events,
 169                           NH_NIC_EVENTS, ifs_ipfhook6_nicevents);
 170                 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_in,
 171                           NH_LOOPBACK_IN, ifs_ipfhook6_loop_in);
 172                 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_out,
 173                           NH_LOOPBACK_OUT, ifs_ipfhook6_loop_out);
 174 
 175                 if (net_protocol_release(ifs->ifs_ipf_ipv6) != 0)
 176                         goto detach_failed;
 177                 ifs->ifs_ipf_ipv6 = NULL;
 178         }
 179 
 180         /*
 181          * Remove IPv4 Hooks
 182          */
 183         if (ifs->ifs_ipf_ipv4 != NULL) {
 184                 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_in,
 185                           NH_PHYSICAL_IN, ifs_ipfhook4_in);
 186                 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_out,
 187                           NH_PHYSICAL_OUT, ifs_ipfhook4_out);
 188                 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_nic_events,
 189                           NH_NIC_EVENTS, ifs_ipfhook4_nicevents);
 190                 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_in,
 191                           NH_LOOPBACK_IN, ifs_ipfhook4_loop_in);
 192                 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_out,
 193                           NH_LOOPBACK_OUT, ifs_ipfhook4_loop_out);
 194 
 195                 if (net_protocol_release(ifs->ifs_ipf_ipv4) != 0)
 196                         goto detach_failed;
 197                 ifs->ifs_ipf_ipv4 = NULL;
 198         }
 199 
 200 #undef UNDO_HOOK
 201 
 202 #ifdef  IPFDEBUG
 203         cmn_err(CE_CONT, "ipldetach()\n");
 204 #endif
 205 
 206         WRITE_ENTER(&ifs->ifs_ipf_global);
 207         fr_deinitialise(ifs);
 208 
 209         (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs);
 210         (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs);
 211 
 212         if (ifs->ifs_ipf_locks_done == 1) {
 213                 MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock);
 214                 MUTEX_DESTROY(&ifs->ifs_ipf_rw);
 215                 RW_DESTROY(&ifs->ifs_ipf_tokens);
 216                 RW_DESTROY(&ifs->ifs_ipf_ipidfrag);
 217                 ifs->ifs_ipf_locks_done = 0;
 218         }
 219 
 220         if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out ||
 221             ifs->ifs_hook4_nic_events || ifs->ifs_hook4_loopback_in ||
 222             ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events ||
 223             ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out ||
 224             ifs->ifs_hook6_loopback_in || ifs->ifs_hook6_loopback_out)
 225                 return -1;
 226 
 227         return 0;
 228 
 229 detach_failed:
 230         WRITE_ENTER(&ifs->ifs_ipf_global);
 231         return -1;
 232 }
 233 
 234 int iplattach(ifs)
 235 ipf_stack_t *ifs;
 236 {
 237 #if SOLARIS2 < 10
 238         int i;
 239 #endif
 240         netid_t id = ifs->ifs_netid;
 241 
 242 #ifdef  IPFDEBUG
 243         cmn_err(CE_CONT, "iplattach()\n");
 244 #endif
 245 
 246         ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0);
 247         ifs->ifs_fr_flags = IPF_LOGGING;
 248 #ifdef _KERNEL
 249         ifs->ifs_fr_update_ipid = 0;
 250 #else
 251         ifs->ifs_fr_update_ipid = 1;
 252 #endif
 253         ifs->ifs_fr_minttl = 4;
 254         ifs->ifs_fr_icmpminfragmtu = 68;
 255 #if defined(IPFILTER_DEFAULT_BLOCK)
 256         ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH;
 257 #else
 258         ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH;
 259 #endif
 260 
 261         bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache));
 262         MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex");
 263         MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex");
 264         RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock");
 265         RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock");
 266         ifs->ifs_ipf_locks_done = 1;
 267 
 268         if (fr_initialise(ifs) < 0)
 269                 return -1;
 270 
 271         HOOK_INIT(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4,
 272                   "ipfilter_hook4_nicevents", ifs);
 273         HOOK_INIT(ifs->ifs_ipfhook4_in, ipf_hook4_in,
 274                   "ipfilter_hook4_in", ifs);
 275         HOOK_INIT(ifs->ifs_ipfhook4_out, ipf_hook4_out,
 276                   "ipfilter_hook4_out", ifs);
 277         HOOK_INIT(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in,
 278                   "ipfilter_hook4_loop_in", ifs);
 279         HOOK_INIT(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out,
 280                   "ipfilter_hook4_loop_out", ifs);
 281 
 282         /*
 283          * If we hold this lock over all of the net_hook_register calls, we
 284          * can cause a deadlock to occur with the following lock ordering:
 285          * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
 286          * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path)
 287          */
 288         RWLOCK_EXIT(&ifs->ifs_ipf_global);
 289 
 290         /*
 291          * Add IPv4 hooks
 292          */
 293         ifs->ifs_ipf_ipv4 = net_protocol_lookup(id, NHF_INET);
 294         if (ifs->ifs_ipf_ipv4 == NULL)
 295                 goto hookup_failed;
 296 
 297         ifs->ifs_hook4_nic_events = (net_hook_register(ifs->ifs_ipf_ipv4,
 298             NH_NIC_EVENTS, ifs->ifs_ipfhook4_nicevents) == 0);
 299         if (!ifs->ifs_hook4_nic_events)
 300                 goto hookup_failed;
 301 
 302         ifs->ifs_hook4_physical_in = (net_hook_register(ifs->ifs_ipf_ipv4,
 303             NH_PHYSICAL_IN, ifs->ifs_ipfhook4_in) == 0);
 304         if (!ifs->ifs_hook4_physical_in)
 305                 goto hookup_failed;
 306 
 307         ifs->ifs_hook4_physical_out = (net_hook_register(ifs->ifs_ipf_ipv4,
 308             NH_PHYSICAL_OUT, ifs->ifs_ipfhook4_out) == 0);
 309         if (!ifs->ifs_hook4_physical_out)
 310                 goto hookup_failed;
 311 
 312         if (ifs->ifs_ipf_loopback) {
 313                 ifs->ifs_hook4_loopback_in = (net_hook_register(
 314                     ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
 315                     ifs->ifs_ipfhook4_loop_in) == 0);
 316                 if (!ifs->ifs_hook4_loopback_in)
 317                         goto hookup_failed;
 318 
 319                 ifs->ifs_hook4_loopback_out = (net_hook_register(
 320                     ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
 321                     ifs->ifs_ipfhook4_loop_out) == 0);
 322                 if (!ifs->ifs_hook4_loopback_out)
 323                         goto hookup_failed;
 324         }
 325         /*
 326          * Add IPv6 hooks
 327          */
 328         ifs->ifs_ipf_ipv6 = net_protocol_lookup(id, NHF_INET6);
 329         if (ifs->ifs_ipf_ipv6 == NULL)
 330                 goto hookup_failed;
 331 
 332         HOOK_INIT(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6,
 333                   "ipfilter_hook6_nicevents", ifs);
 334         HOOK_INIT(ifs->ifs_ipfhook6_in, ipf_hook6_in,
 335                   "ipfilter_hook6_in", ifs);
 336         HOOK_INIT(ifs->ifs_ipfhook6_out, ipf_hook6_out,
 337                   "ipfilter_hook6_out", ifs);
 338         HOOK_INIT(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in,
 339                   "ipfilter_hook6_loop_in", ifs);
 340         HOOK_INIT(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out,
 341                   "ipfilter_hook6_loop_out", ifs);
 342 
 343         ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6,
 344             NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0);
 345         if (!ifs->ifs_hook6_nic_events)
 346                 goto hookup_failed;
 347 
 348         ifs->ifs_hook6_physical_in = (net_hook_register(ifs->ifs_ipf_ipv6,
 349             NH_PHYSICAL_IN, ifs->ifs_ipfhook6_in) == 0);
 350         if (!ifs->ifs_hook6_physical_in)
 351                 goto hookup_failed;
 352 
 353         ifs->ifs_hook6_physical_out = (net_hook_register(ifs->ifs_ipf_ipv6,
 354             NH_PHYSICAL_OUT, ifs->ifs_ipfhook6_out) == 0);
 355         if (!ifs->ifs_hook6_physical_out)
 356                 goto hookup_failed;
 357 
 358         if (ifs->ifs_ipf_loopback) {
 359                 ifs->ifs_hook6_loopback_in = (net_hook_register(
 360                     ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
 361                     ifs->ifs_ipfhook6_loop_in) == 0);
 362                 if (!ifs->ifs_hook6_loopback_in)
 363                         goto hookup_failed;
 364 
 365                 ifs->ifs_hook6_loopback_out = (net_hook_register(
 366                     ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
 367                     ifs->ifs_ipfhook6_loop_out) == 0);
 368                 if (!ifs->ifs_hook6_loopback_out)
 369                         goto hookup_failed;
 370         }
 371 
 372         /*
 373          * Reacquire ipf_global, now it is safe.
 374          */
 375         WRITE_ENTER(&ifs->ifs_ipf_global);
 376 
 377 /* Do not use private interface ip_params_arr[] in Solaris 10 */
 378 #if SOLARIS2 < 10
 379 
 380 #if SOLARIS2 >= 8
 381         ip_forwarding = &ip_g_forward;
 382 #endif
 383         /*
 384          * XXX - There is no terminator for this array, so it is not possible
 385          * to tell if what we are looking for is missing and go off the end
 386          * of the array.
 387          */
 388 
 389 #if SOLARIS2 <= 8
 390         for (i = 0; ; i++) {
 391                 if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) {
 392                         ip_ttl_ptr = &ip_param_arr[i].ip_param_value;
 393                 } else if (!strcmp(ip_param_arr[i].ip_param_name,
 394                             "ip_path_mtu_discovery")) {
 395                         ip_mtudisc = &ip_param_arr[i].ip_param_value;
 396                 }
 397 #if SOLARIS2 < 8
 398                 else if (!strcmp(ip_param_arr[i].ip_param_name,
 399                             "ip_forwarding")) {
 400                         ip_forwarding = &ip_param_arr[i].ip_param_value;
 401                 }
 402 #else
 403                 else if (!strcmp(ip_param_arr[i].ip_param_name,
 404                             "ip6_forwarding")) {
 405                         ip6_forwarding = &ip_param_arr[i].ip_param_value;
 406                 }
 407 #endif
 408 
 409                 if (ip_mtudisc != NULL && ip_ttl_ptr != NULL &&
 410 #if SOLARIS2 >= 8
 411                     ip6_forwarding != NULL &&
 412 #endif
 413                     ip_forwarding != NULL)
 414                         break;
 415         }
 416 #endif
 417 
 418         if (ifs->ifs_fr_control_forwarding & 1) {
 419                 if (ip_forwarding != NULL)
 420                         *ip_forwarding = 1;
 421 #if SOLARIS2 >= 8
 422                 if (ip6_forwarding != NULL)
 423                         *ip6_forwarding = 1;
 424 #endif
 425         }
 426 
 427 #endif
 428 
 429         return 0;
 430 hookup_failed:
 431         WRITE_ENTER(&ifs->ifs_ipf_global);
 432         return -1;
 433 }
 434 
 435 static  int     fr_setipfloopback(set, ifs)
 436 int set;
 437 ipf_stack_t *ifs;
 438 {
 439         if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL)
 440                 return EFAULT;
 441 
 442         if (set && !ifs->ifs_ipf_loopback) {
 443                 ifs->ifs_ipf_loopback = 1;
 444 
 445                 ifs->ifs_hook4_loopback_in = (net_hook_register(
 446                     ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
 447                     ifs->ifs_ipfhook4_loop_in) == 0);
 448                 if (!ifs->ifs_hook4_loopback_in)
 449                         return EINVAL;
 450 
 451                 ifs->ifs_hook4_loopback_out = (net_hook_register(
 452                     ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
 453                     ifs->ifs_ipfhook4_loop_out) == 0);
 454                 if (!ifs->ifs_hook4_loopback_out)
 455                         return EINVAL;
 456 
 457                 ifs->ifs_hook6_loopback_in = (net_hook_register(
 458                     ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
 459                     ifs->ifs_ipfhook6_loop_in) == 0);
 460                 if (!ifs->ifs_hook6_loopback_in)
 461                         return EINVAL;
 462 
 463                 ifs->ifs_hook6_loopback_out = (net_hook_register(
 464                     ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
 465                     ifs->ifs_ipfhook6_loop_out) == 0);
 466                 if (!ifs->ifs_hook6_loopback_out)
 467                         return EINVAL;
 468 
 469         } else if (!set && ifs->ifs_ipf_loopback) {
 470                 ifs->ifs_ipf_loopback = 0;
 471 
 472                 ifs->ifs_hook4_loopback_in =
 473                     (net_hook_unregister(ifs->ifs_ipf_ipv4,
 474                     NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
 475                 if (ifs->ifs_hook4_loopback_in)
 476                         return EBUSY;
 477 
 478                 ifs->ifs_hook4_loopback_out =
 479                     (net_hook_unregister(ifs->ifs_ipf_ipv4,
 480                     NH_LOOPBACK_OUT, ifs->ifs_ipfhook4_loop_out) != 0);
 481                 if (ifs->ifs_hook4_loopback_out)
 482                         return EBUSY;
 483 
 484                 ifs->ifs_hook6_loopback_in =
 485                     (net_hook_unregister(ifs->ifs_ipf_ipv6,
 486                     NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
 487                 if (ifs->ifs_hook6_loopback_in)
 488                         return EBUSY;
 489 
 490                 ifs->ifs_hook6_loopback_out =
 491                     (net_hook_unregister(ifs->ifs_ipf_ipv6,
 492                     NH_LOOPBACK_OUT, ifs->ifs_ipfhook6_loop_out) != 0);
 493                 if (ifs->ifs_hook6_loopback_out)
 494                         return EBUSY;
 495         }
 496         return 0;
 497 }
 498 
 499 
 500 /*
 501  * Filter ioctl interface.
 502  */
 503 /*ARGSUSED*/
 504 int iplioctl(dev, cmd, data, mode, cp, rp)
 505 dev_t dev;
 506 int cmd;
 507 #if SOLARIS2 >= 7
 508 intptr_t data;
 509 #else
 510 int *data;
 511 #endif
 512 int mode;
 513 cred_t *cp;
 514 int *rp;
 515 {
 516         int error = 0, tmp;
 517         friostat_t fio;
 518         minor_t unit;
 519         u_int enable;
 520         ipf_stack_t *ifs;
 521 
 522 #ifdef  IPFDEBUG
 523         cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
 524                 dev, cmd, data, mode, cp, rp);
 525 #endif
 526         unit = getminor(dev);
 527         if (IPL_LOGMAX < unit)
 528                 return ENXIO;
 529 
 530         /*
 531          * As we're calling ipf_find_stack in user space, from a given zone
 532          * to find the stack pointer for this zone, there is no need to have
 533          * a hold/refence count here.
 534          */
 535         ifs = ipf_find_stack(crgetzoneid(cp));
 536         ASSERT(ifs != NULL);
 537 
 538         if (ifs->ifs_fr_running <= 0) {
 539                 if (unit != IPL_LOGIPF) {
 540                         return EIO;
 541                 }
 542                 if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
 543                     cmd != SIOCIPFSET && cmd != SIOCFRENB &&
 544                     cmd != SIOCGETFS && cmd != SIOCGETFF) {
 545                         return EIO;
 546                 }
 547         }
 548 
 549         READ_ENTER(&ifs->ifs_ipf_global);
 550         if (ifs->ifs_fr_enable_active != 0) {
 551                 RWLOCK_EXIT(&ifs->ifs_ipf_global);
 552                 return EBUSY;
 553         }
 554 
 555         error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, crgetuid(cp),
 556                                curproc, ifs);
 557         if (error != -1) {
 558                 RWLOCK_EXIT(&ifs->ifs_ipf_global);
 559                 return error;
 560         }
 561         error = 0;
 562 
 563         switch (cmd)
 564         {
 565         case SIOCFRENB :
 566                 if (!(mode & FWRITE))
 567                         error = EPERM;
 568                 else {
 569                         error = COPYIN((caddr_t)data, (caddr_t)&enable,
 570                                        sizeof(enable));
 571                         if (error != 0) {
 572                                 error = EFAULT;
 573                                 break;
 574                         }
 575 
 576                         RWLOCK_EXIT(&ifs->ifs_ipf_global);
 577                         WRITE_ENTER(&ifs->ifs_ipf_global);
 578 
 579                         /*
 580                          * We must recheck fr_enable_active here, since we've
 581                          * dropped ifs_ipf_global from R in order to get it
 582                          * exclusively.
 583                          */
 584                         if (ifs->ifs_fr_enable_active == 0) {
 585                                 ifs->ifs_fr_enable_active = 1;
 586                                 error = fr_enableipf(ifs, enable);
 587                                 ifs->ifs_fr_enable_active = 0;
 588                         }
 589                 }
 590                 break;
 591         case SIOCIPFSET :
 592                 if (!(mode & FWRITE)) {
 593                         error = EPERM;
 594                         break;
 595                 }
 596                 /* FALLTHRU */
 597         case SIOCIPFGETNEXT :
 598         case SIOCIPFGET :
 599                 error = fr_ipftune(cmd, (void *)data, ifs);
 600                 break;
 601         case SIOCSETFF :
 602                 if (!(mode & FWRITE))
 603                         error = EPERM;
 604                 else {
 605                         error = COPYIN((caddr_t)data,
 606                                        (caddr_t)&ifs->ifs_fr_flags,
 607                                        sizeof(ifs->ifs_fr_flags));
 608                         if (error != 0)
 609                                 error = EFAULT;
 610                 }
 611                 break;
 612         case SIOCIPFLP :
 613                 error = COPYIN((caddr_t)data, (caddr_t)&tmp,
 614                                sizeof(tmp));
 615                 if (error != 0)
 616                         error = EFAULT;
 617                 else
 618                         error = fr_setipfloopback(tmp, ifs);
 619                 break;
 620         case SIOCGETFF :
 621                 error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data,
 622                                 sizeof(ifs->ifs_fr_flags));
 623                 if (error != 0)
 624                         error = EFAULT;
 625                 break;
 626         case SIOCFUNCL :
 627                 error = fr_resolvefunc((void *)data);
 628                 break;
 629         case SIOCINAFR :
 630         case SIOCRMAFR :
 631         case SIOCADAFR :
 632         case SIOCZRLST :
 633                 if (!(mode & FWRITE))
 634                         error = EPERM;
 635                 else
 636                         error = frrequest(unit, cmd, (caddr_t)data,
 637                                           ifs->ifs_fr_active, 1, ifs);
 638                 break;
 639         case SIOCINIFR :
 640         case SIOCRMIFR :
 641         case SIOCADIFR :
 642                 if (!(mode & FWRITE))
 643                         error = EPERM;
 644                 else
 645                         error = frrequest(unit, cmd, (caddr_t)data,
 646                                           1 - ifs->ifs_fr_active, 1, ifs);
 647                 break;
 648         case SIOCSWAPA :
 649                 if (!(mode & FWRITE))
 650                         error = EPERM;
 651                 else {
 652                         WRITE_ENTER(&ifs->ifs_ipf_mutex);
 653                         bzero((char *)ifs->ifs_frcache,
 654                             sizeof (ifs->ifs_frcache));
 655                         error = COPYOUT((caddr_t)&ifs->ifs_fr_active,
 656                                         (caddr_t)data,
 657                                         sizeof(ifs->ifs_fr_active));
 658                         if (error != 0)
 659                                 error = EFAULT;
 660                         else
 661                                 ifs->ifs_fr_active = 1 - ifs->ifs_fr_active;
 662                         RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
 663                 }
 664                 break;
 665         case SIOCGETFS :
 666                 fr_getstat(&fio, ifs);
 667                 error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT);
 668                 break;
 669         case SIOCFRZST :
 670                 if (!(mode & FWRITE))
 671                         error = EPERM;
 672                 else
 673                         error = fr_zerostats((caddr_t)data, ifs);
 674                 break;
 675         case    SIOCIPFFL :
 676                 if (!(mode & FWRITE))
 677                         error = EPERM;
 678                 else {
 679                         error = COPYIN((caddr_t)data, (caddr_t)&tmp,
 680                                        sizeof(tmp));
 681                         if (!error) {
 682                                 tmp = frflush(unit, 4, tmp, ifs);
 683                                 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
 684                                                 sizeof(tmp));
 685                                 if (error != 0)
 686                                         error = EFAULT;
 687                         } else
 688                                 error = EFAULT;
 689                 }
 690                 break;
 691 #ifdef USE_INET6
 692         case    SIOCIPFL6 :
 693                 if (!(mode & FWRITE))
 694                         error = EPERM;
 695                 else {
 696                         error = COPYIN((caddr_t)data, (caddr_t)&tmp,
 697                                        sizeof(tmp));
 698                         if (!error) {
 699                                 tmp = frflush(unit, 6, tmp, ifs);
 700                                 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
 701                                                 sizeof(tmp));
 702                                 if (error != 0)
 703                                         error = EFAULT;
 704                         } else
 705                                 error = EFAULT;
 706                 }
 707                 break;
 708 #endif
 709         case SIOCSTLCK :
 710                 error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
 711                 if (error == 0) {
 712                         ifs->ifs_fr_state_lock = tmp;
 713                         ifs->ifs_fr_nat_lock = tmp;
 714                         ifs->ifs_fr_frag_lock = tmp;
 715                         ifs->ifs_fr_auth_lock = tmp;
 716                 } else
 717                         error = EFAULT;
 718         break;
 719 #ifdef  IPFILTER_LOG
 720         case    SIOCIPFFB :
 721                 if (!(mode & FWRITE))
 722                         error = EPERM;
 723                 else {
 724                         tmp = ipflog_clear(unit, ifs);
 725                         error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
 726                                        sizeof(tmp));
 727                         if (error)
 728                                 error = EFAULT;
 729                 }
 730                 break;
 731 #endif /* IPFILTER_LOG */
 732         case SIOCFRSYN :
 733                 if (!(mode & FWRITE))
 734                         error = EPERM;
 735                 else {
 736                         RWLOCK_EXIT(&ifs->ifs_ipf_global);
 737                         WRITE_ENTER(&ifs->ifs_ipf_global);
 738 
 739                         frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
 740                         fr_natifpsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
 741                         fr_nataddrsync(0, NULL, NULL, ifs);
 742                         fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
 743                         error = 0;
 744                 }
 745                 break;
 746         case SIOCGFRST :
 747                 error = fr_outobj((void *)data, fr_fragstats(ifs),
 748                                   IPFOBJ_FRAGSTAT);
 749                 break;
 750         case FIONREAD :
 751 #ifdef  IPFILTER_LOG
 752                 tmp = (int)ifs->ifs_iplused[IPL_LOGIPF];
 753 
 754                 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp));
 755                 if (error != 0)
 756                         error = EFAULT;
 757 #endif
 758                 break;
 759         case SIOCIPFITER :
 760                 error = ipf_frruleiter((caddr_t)data, crgetuid(cp),
 761                                        curproc, ifs);
 762                 break;
 763 
 764         case SIOCGENITER :
 765                 error = ipf_genericiter((caddr_t)data, crgetuid(cp),
 766                                         curproc, ifs);
 767                 break;
 768 
 769         case SIOCIPFDELTOK :
 770                 error = BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
 771                 if (error != 0) {
 772                         error = EFAULT;
 773                 } else {
 774                         error = ipf_deltoken(tmp, crgetuid(cp), curproc, ifs);
 775                 }
 776                 break;
 777 
 778         default :
 779 #ifdef  IPFDEBUG
 780                 cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p",
 781                         cmd, (void *)data);
 782 #endif
 783                 error = EINVAL;
 784                 break;
 785         }
 786         RWLOCK_EXIT(&ifs->ifs_ipf_global);
 787         return error;
 788 }
 789 
 790 
 791 static int fr_enableipf(ifs, enable)
 792 ipf_stack_t *ifs;
 793 int enable;
 794 {
 795         int error;
 796 
 797         if (!enable) {
 798                 error = ipldetach(ifs);
 799                 if (error == 0)
 800                         ifs->ifs_fr_running = -1;
 801                 return error;
 802         }
 803 
 804         if (ifs->ifs_fr_running > 0)
 805                 return 0;
 806 
 807         error = iplattach(ifs);
 808         if (error == 0) {
 809                 if (ifs->ifs_fr_timer_id == NULL) {
 810                         int hz = drv_usectohz(500000);
 811 
 812                         ifs->ifs_fr_timer_id = timeout(fr_slowtimer,
 813                                                        (void *)ifs,
 814                                                        hz);
 815                 }
 816                 ifs->ifs_fr_running = 1;
 817         } else {
 818                 (void) ipldetach(ifs);
 819         }
 820         return error;
 821 }
 822 
 823 
 824 phy_if_t get_unit(name, v, ifs)
 825 char *name;
 826 int v;
 827 ipf_stack_t *ifs;
 828 {
 829         net_handle_t nif;
 830  
 831         if (v == 4)
 832                 nif = ifs->ifs_ipf_ipv4;
 833         else if (v == 6)
 834                 nif = ifs->ifs_ipf_ipv6;
 835         else
 836                 return 0;
 837 
 838         return (net_phylookup(nif, name));
 839 }
 840 
 841 /*
 842  * routines below for saving IP headers to buffer
 843  */
 844 /*ARGSUSED*/
 845 int iplopen(devp, flags, otype, cred)
 846 dev_t *devp;
 847 int flags, otype;
 848 cred_t *cred;
 849 {
 850         minor_t min = getminor(*devp);
 851 
 852 #ifdef  IPFDEBUG
 853         cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
 854 #endif
 855         if (!(otype & OTYP_CHR))
 856                 return ENXIO;
 857 
 858         min = (IPL_LOGMAX < min) ? ENXIO : 0;
 859         return min;
 860 }
 861 
 862 
 863 /*ARGSUSED*/
 864 int iplclose(dev, flags, otype, cred)
 865 dev_t dev;
 866 int flags, otype;
 867 cred_t *cred;
 868 {
 869         minor_t min = getminor(dev);
 870 
 871 #ifdef  IPFDEBUG
 872         cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
 873 #endif
 874 
 875         min = (IPL_LOGMAX < min) ? ENXIO : 0;
 876         return min;
 877 }
 878 
 879 #ifdef  IPFILTER_LOG
 880 /*
 881  * iplread/ipllog
 882  * both of these must operate with at least splnet() lest they be
 883  * called during packet processing and cause an inconsistancy to appear in
 884  * the filter lists.
 885  */
 886 /*ARGSUSED*/
 887 int iplread(dev, uio, cp)
 888 dev_t dev;
 889 register struct uio *uio;
 890 cred_t *cp;
 891 {
 892         ipf_stack_t *ifs;
 893         int ret;
 894 
 895         /*
 896          * As we're calling ipf_find_stack in user space, from a given zone
 897          * to find the stack pointer for this zone, there is no need to have
 898          * a hold/refence count here.
 899          */
 900         ifs = ipf_find_stack(crgetzoneid(cp));
 901         ASSERT(ifs != NULL);
 902 
 903 # ifdef IPFDEBUG
 904         cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
 905 # endif
 906 
 907         if (ifs->ifs_fr_running < 1) {
 908                 return EIO;
 909         }
 910 
 911 # ifdef IPFILTER_SYNC
 912         if (getminor(dev) == IPL_LOGSYNC) {
 913                 return ipfsync_read(uio);
 914         }
 915 # endif
 916 
 917         ret = ipflog_read(getminor(dev), uio, ifs);
 918         return ret;
 919 }
 920 #endif /* IPFILTER_LOG */
 921 
 922 
 923 /*
 924  * iplread/ipllog
 925  * both of these must operate with at least splnet() lest they be
 926  * called during packet processing and cause an inconsistancy to appear in
 927  * the filter lists.
 928  */
 929 int iplwrite(dev, uio, cp)
 930 dev_t dev;
 931 register struct uio *uio;
 932 cred_t *cp;
 933 {
 934         ipf_stack_t *ifs;
 935 
 936         /*
 937          * As we're calling ipf_find_stack in user space, from a given zone
 938          * to find the stack pointer for this zone, there is no need to have
 939          * a hold/refence count here.
 940          */
 941         ifs = ipf_find_stack(crgetzoneid(cp));
 942         ASSERT(ifs != NULL);
 943 
 944 #ifdef  IPFDEBUG
 945         cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp);
 946 #endif
 947 
 948         if (ifs->ifs_fr_running < 1) {
 949                 return EIO;
 950         }
 951 
 952 #ifdef  IPFILTER_SYNC
 953         if (getminor(dev) == IPL_LOGSYNC)
 954                 return ipfsync_write(uio);
 955 #endif /* IPFILTER_SYNC */
 956         dev = dev;      /* LINT */
 957         uio = uio;      /* LINT */
 958         cp = cp;        /* LINT */
 959         return ENXIO;
 960 }
 961 
 962 
 963 /*
 964  * fr_send_reset - this could conceivably be a call to tcp_respond(), but that
 965  * requires a large amount of setting up and isn't any more efficient.
 966  */
 967 int fr_send_reset(fin)
 968 fr_info_t *fin;
 969 {
 970         tcphdr_t *tcp, *tcp2;
 971         int tlen, hlen;
 972         mblk_t *m;
 973 #ifdef  USE_INET6
 974         ip6_t *ip6;
 975 #endif
 976         ip_t *ip;
 977 
 978         tcp = fin->fin_dp;
 979         if (tcp->th_flags & TH_RST)
 980                 return -1;
 981 
 982 #ifndef IPFILTER_CKSUM
 983         if (fr_checkl4sum(fin) == -1)
 984                 return -1;
 985 #endif
 986 
 987         tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0;
 988 #ifdef  USE_INET6
 989         if (fin->fin_v == 6)
 990                 hlen = sizeof(ip6_t);
 991         else
 992 #endif
 993                 hlen = sizeof(ip_t);
 994         hlen += sizeof(*tcp2);
 995         if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL)
 996                 return -1;
 997 
 998         m->b_rptr += 64;
 999         MTYPE(m) = M_DATA;
1000         m->b_wptr = m->b_rptr + hlen;
1001         ip = (ip_t *)m->b_rptr;
1002         bzero((char *)ip, hlen);
1003         tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2));
1004         tcp2->th_dport = tcp->th_sport;
1005         tcp2->th_sport = tcp->th_dport;
1006         if (tcp->th_flags & TH_ACK) {
1007                 tcp2->th_seq = tcp->th_ack;
1008                 tcp2->th_flags = TH_RST;
1009         } else {
1010                 tcp2->th_ack = ntohl(tcp->th_seq);
1011                 tcp2->th_ack += tlen;
1012                 tcp2->th_ack = htonl(tcp2->th_ack);
1013                 tcp2->th_flags = TH_RST|TH_ACK;
1014         }
1015         tcp2->th_off = sizeof(struct tcphdr) >> 2;
1016 
1017         ip->ip_v = fin->fin_v;
1018 #ifdef  USE_INET6
1019         if (fin->fin_v == 6) {
1020                 ip6 = (ip6_t *)m->b_rptr;
1021                 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1022                 ip6->ip6_src = fin->fin_dst6.in6;
1023                 ip6->ip6_dst = fin->fin_src6.in6;
1024                 ip6->ip6_plen = htons(sizeof(*tcp));
1025                 ip6->ip6_nxt = IPPROTO_TCP;
1026                 tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2);
1027         } else
1028 #endif
1029         {
1030                 ip->ip_src.s_addr = fin->fin_daddr;
1031                 ip->ip_dst.s_addr = fin->fin_saddr;
1032                 ip->ip_id = fr_nextipid(fin);
1033                 ip->ip_hl = sizeof(*ip) >> 2;
1034                 ip->ip_p = IPPROTO_TCP;
1035                 ip->ip_len = sizeof(*ip) + sizeof(*tcp);
1036                 ip->ip_tos = fin->fin_ip->ip_tos;
1037                 tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2);
1038         }
1039         return fr_send_ip(fin, m, &m);
1040 }
1041 
1042 /*
1043  * Function:    fr_send_ip
1044  * Returns:      0: success
1045  *              -1: failed
1046  * Parameters:
1047  *      fin: packet information
1048  *      m: the message block where ip head starts
1049  *
1050  * Send a new packet through the IP stack. 
1051  *
1052  * For IPv4 packets, ip_len must be in host byte order, and ip_v,
1053  * ip_ttl, ip_off, and ip_sum are ignored (filled in by this
1054  * function).
1055  *
1056  * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled
1057  * in by this function.
1058  *
1059  * All other portions of the packet must be in on-the-wire format.
1060  */
1061 /*ARGSUSED*/
1062 static int fr_send_ip(fin, m, mpp)
1063 fr_info_t *fin;
1064 mblk_t *m, **mpp;
1065 {
1066         qpktinfo_t qpi, *qpip;
1067         fr_info_t fnew;
1068         ip_t *ip;
1069         int i, hlen;
1070         ipf_stack_t *ifs = fin->fin_ifs;
1071 
1072         ip = (ip_t *)m->b_rptr;
1073         bzero((char *)&fnew, sizeof(fnew));
1074 
1075 #ifdef  USE_INET6
1076         if (fin->fin_v == 6) {
1077                 ip6_t *ip6;
1078 
1079                 ip6 = (ip6_t *)ip;
1080                 ip6->ip6_vfc = 0x60;
1081                 ip6->ip6_hlim = 127;
1082                 fnew.fin_v = 6;
1083                 hlen = sizeof(*ip6);
1084                 fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen;
1085         } else
1086 #endif
1087         {
1088                 fnew.fin_v = 4;
1089 #if SOLARIS2 >= 10
1090                 ip->ip_ttl = 255;
1091                 if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1)
1092                         ip->ip_off = htons(IP_DF);
1093 #else
1094                 if (ip_ttl_ptr != NULL)
1095                         ip->ip_ttl = (u_char)(*ip_ttl_ptr);
1096                 else
1097                         ip->ip_ttl = 63;
1098                 if (ip_mtudisc != NULL)
1099                         ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0);
1100                 else
1101                         ip->ip_off = htons(IP_DF);
1102 #endif
1103                 /*
1104                  * The dance with byte order and ip_len/ip_off is because in
1105                  * fr_fastroute, it expects them to be in host byte order but
1106                  * ipf_cksum expects them to be in network byte order.
1107                  */
1108                 ip->ip_len = htons(ip->ip_len);
1109                 ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip));
1110                 ip->ip_len = ntohs(ip->ip_len);
1111                 ip->ip_off = ntohs(ip->ip_off);
1112                 hlen = sizeof(*ip);
1113                 fnew.fin_plen = ip->ip_len;
1114         }
1115 
1116         qpip = fin->fin_qpi;
1117         qpi.qpi_off = 0;
1118         qpi.qpi_ill = qpip->qpi_ill;
1119         qpi.qpi_m = m;
1120         qpi.qpi_data = ip;
1121         fnew.fin_qpi = &qpi;
1122         fnew.fin_ifp = fin->fin_ifp;
1123         fnew.fin_flx = FI_NOCKSUM;
1124         fnew.fin_m = m;
1125         fnew.fin_qfm = m;
1126         fnew.fin_ip = ip;
1127         fnew.fin_mp = mpp;
1128         fnew.fin_hlen = hlen;
1129         fnew.fin_dp = (char *)ip + hlen;
1130         fnew.fin_ifs = fin->fin_ifs;
1131         (void) fr_makefrip(hlen, ip, &fnew);
1132 
1133         i = fr_fastroute(m, mpp, &fnew, NULL);
1134         return i;
1135 }
1136 
1137 
1138 int fr_send_icmp_err(type, fin, dst)
1139 int type;
1140 fr_info_t *fin;
1141 int dst;
1142 {
1143         struct in_addr dst4;
1144         struct icmp *icmp;
1145         qpktinfo_t *qpi;
1146         int hlen, code;
1147         phy_if_t phy;
1148         u_short sz;
1149 #ifdef  USE_INET6
1150         mblk_t *mb;
1151 #endif
1152         mblk_t *m;
1153 #ifdef  USE_INET6
1154         ip6_t *ip6;
1155 #endif
1156         ip_t *ip;
1157         ipf_stack_t *ifs = fin->fin_ifs;
1158 
1159         if ((type < 0) || (type > ICMP_MAXTYPE))
1160                 return -1;
1161 
1162         code = fin->fin_icode;
1163 #ifdef USE_INET6
1164         if ((code < 0) || (code >= ICMP_MAX_UNREACH))
1165                 return -1;
1166 #endif
1167 
1168 #ifndef IPFILTER_CKSUM
1169         if (fr_checkl4sum(fin) == -1)
1170                 return -1;
1171 #endif
1172 
1173         qpi = fin->fin_qpi;
1174 
1175 #ifdef  USE_INET6
1176         mb = fin->fin_qfm;
1177 
1178         if (fin->fin_v == 6) {
1179                 sz = sizeof(ip6_t);
1180                 sz += MIN(mb->b_wptr - mb->b_rptr, 512);
1181                 hlen = sizeof(ip6_t);
1182                 type = icmptoicmp6types[type];
1183                 if (type == ICMP6_DST_UNREACH)
1184                         code = icmptoicmp6unreach[code];
1185         } else
1186 #endif
1187         {
1188                 if ((fin->fin_p == IPPROTO_ICMP) &&
1189                     !(fin->fin_flx & FI_SHORT))
1190                         switch (ntohs(fin->fin_data[0]) >> 8)
1191                         {
1192                         case ICMP_ECHO :
1193                         case ICMP_TSTAMP :
1194                         case ICMP_IREQ :
1195                         case ICMP_MASKREQ :
1196                                 break;
1197                         default :
1198                                 return 0;
1199                         }
1200 
1201                 sz = sizeof(ip_t) * 2;
1202                 sz += 8;                /* 64 bits of data */
1203                 hlen = sizeof(ip_t);
1204         }
1205 
1206         sz += offsetof(struct icmp, icmp_ip);
1207         if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL)
1208                 return -1;
1209         MTYPE(m) = M_DATA;
1210         m->b_rptr += 64;
1211         m->b_wptr = m->b_rptr + sz;
1212         bzero((char *)m->b_rptr, (size_t)sz);
1213         ip = (ip_t *)m->b_rptr;
1214         ip->ip_v = fin->fin_v;
1215         icmp = (struct icmp *)(m->b_rptr + hlen);
1216         icmp->icmp_type = type & 0xff;
1217         icmp->icmp_code = code & 0xff;
1218         phy = (phy_if_t)qpi->qpi_ill; 
1219         if (type == ICMP_UNREACH && (phy != 0) && 
1220             fin->fin_icode == ICMP_UNREACH_NEEDFRAG)
1221                 icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 );
1222 
1223 #ifdef  USE_INET6
1224         if (fin->fin_v == 6) {
1225                 struct in6_addr dst6;
1226                 int csz;
1227 
1228                 if (dst == 0) {
1229                         ipf_stack_t *ifs = fin->fin_ifs;
1230 
1231                         if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy,
1232                                        (void *)&dst6, NULL, ifs) == -1) {
1233                                 FREE_MB_T(m);
1234                                 return -1;
1235                         }
1236                 } else
1237                         dst6 = fin->fin_dst6.in6;
1238 
1239                 csz = sz;
1240                 sz -= sizeof(ip6_t);
1241                 ip6 = (ip6_t *)m->b_rptr;
1242                 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1243                 ip6->ip6_plen = htons((u_short)sz);
1244                 ip6->ip6_nxt = IPPROTO_ICMPV6;
1245                 ip6->ip6_src = dst6;
1246                 ip6->ip6_dst = fin->fin_src6.in6;
1247                 sz -= offsetof(struct icmp, icmp_ip);
1248                 bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz);
1249                 icmp->icmp_cksum = csz - sizeof(ip6_t);
1250         } else
1251 #endif
1252         {
1253                 ip->ip_hl = sizeof(*ip) >> 2;
1254                 ip->ip_p = IPPROTO_ICMP;
1255                 ip->ip_id = fin->fin_ip->ip_id;
1256                 ip->ip_tos = fin->fin_ip->ip_tos;
1257                 ip->ip_len = (u_short)sz;
1258                 if (dst == 0) {
1259                         ipf_stack_t *ifs = fin->fin_ifs;
1260 
1261                         if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy,
1262                                        (void *)&dst4, NULL, ifs) == -1) {
1263                                 FREE_MB_T(m);
1264                                 return -1;
1265                         }
1266                 } else {
1267                         dst4 = fin->fin_dst;
1268                 }
1269                 ip->ip_src = dst4;
1270                 ip->ip_dst = fin->fin_src;
1271                 bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip,
1272                       sizeof(*fin->fin_ip));
1273                 bcopy((char *)fin->fin_ip + fin->fin_hlen,
1274                       (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8);
1275                 icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len);
1276                 icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off);
1277                 icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
1278                                              sz - sizeof(ip_t));
1279         }
1280 
1281         /*
1282          * Need to exit out of these so we don't recursively call rw_enter
1283          * from fr_qout.
1284          */
1285         return fr_send_ip(fin, m, &m);
1286 }
1287 
1288 #include <sys/time.h>
1289 #include <sys/varargs.h>
1290 
1291 #ifndef _KERNEL
1292 #include <stdio.h>
1293 #endif
1294 
1295 /*
1296  * Return the first IP Address associated with an interface
1297  * For IPv6, we walk through the list of logical interfaces and return
1298  * the address of the first one that isn't a link-local interface.
1299  * We can't assume that it is :1 because another link-local address
1300  * may have been assigned there.
1301  */
1302 /*ARGSUSED*/
1303 int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs)
1304 int v, atype;
1305 void *ifptr;
1306 struct in_addr  *inp, *inpmask;
1307 ipf_stack_t *ifs;
1308 {
1309         struct sockaddr_in6 v6addr[2];
1310         struct sockaddr_in v4addr[2];
1311         net_ifaddr_t type[2];
1312         net_handle_t net_data;
1313         phy_if_t phyif;
1314         void *array;
1315 
1316         switch (v)
1317         {
1318         case 4:
1319                 net_data = ifs->ifs_ipf_ipv4;
1320                 array = v4addr;
1321                 break;
1322         case 6:
1323                 net_data = ifs->ifs_ipf_ipv6;
1324                 array = v6addr;
1325                 break;
1326         default:
1327                 net_data = NULL;
1328                 break;
1329         }
1330 
1331         if (net_data == NULL)
1332                 return -1;
1333 
1334         phyif = (phy_if_t)ifptr;
1335 
1336         switch (atype)
1337         {
1338         case FRI_PEERADDR :
1339                 type[0] = NA_PEER;
1340                 break;
1341 
1342         case FRI_BROADCAST :
1343                 type[0] = NA_BROADCAST;
1344                 break;
1345 
1346         default :
1347                 type[0] = NA_ADDRESS;
1348                 break;
1349         }
1350 
1351         type[1] = NA_NETMASK;
1352 
1353         if (v == 6) {
1354                 lif_if_t idx = 0;
1355 
1356                 do {
1357                         idx = net_lifgetnext(net_data, phyif, idx);
1358                         if (net_getlifaddr(net_data, phyif, idx, 2, type,
1359                                            array) < 0)
1360                                 return -1;
1361                         if (!IN6_IS_ADDR_LINKLOCAL(&v6addr[0].sin6_addr) &&
1362                             !IN6_IS_ADDR_MULTICAST(&v6addr[0].sin6_addr))
1363                                 break;
1364                 } while (idx != 0);
1365 
1366                 if (idx == 0)
1367                         return -1;
1368 
1369                 return fr_ifpfillv6addr(atype, &v6addr[0], &v6addr[1],
1370                                         inp, inpmask);
1371         }
1372 
1373         if (net_getlifaddr(net_data, phyif, 0, 2, type, array) < 0)
1374                 return -1;
1375 
1376         return fr_ifpfillv4addr(atype, &v4addr[0], &v4addr[1], inp, inpmask);
1377 }
1378 
1379 
1380 u_32_t fr_newisn(fin)
1381 fr_info_t *fin;
1382 {
1383         static int iss_seq_off = 0;
1384         u_char hash[16];
1385         u_32_t newiss;
1386         MD5_CTX ctx;
1387         ipf_stack_t *ifs = fin->fin_ifs;
1388 
1389         /*
1390          * Compute the base value of the ISS.  It is a hash
1391          * of (saddr, sport, daddr, dport, secret).
1392          */
1393         MD5Init(&ctx);
1394 
1395         MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src,
1396                   sizeof(fin->fin_fi.fi_src));
1397         MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst,
1398                   sizeof(fin->fin_fi.fi_dst));
1399         MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat));
1400 
1401         MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret));
1402 
1403         MD5Final(hash, &ctx);
1404 
1405         bcopy(hash, &newiss, sizeof(newiss));
1406 
1407         /*
1408          * Now increment our "timer", and add it in to
1409          * the computed value.
1410          *
1411          * XXX Use `addin'?
1412          * XXX TCP_ISSINCR too large to use?
1413          */
1414         iss_seq_off += 0x00010000;
1415         newiss += iss_seq_off;
1416         return newiss;
1417 }
1418 
1419 
1420 /* ------------------------------------------------------------------------ */
1421 /* Function:    fr_nextipid                                                 */
1422 /* Returns:     int - 0 == success, -1 == error (packet should be droppped) */
1423 /* Parameters:  fin(I) - pointer to packet information                      */
1424 /*                                                                          */
1425 /* Returns the next IPv4 ID to use for this packet.                         */
1426 /* ------------------------------------------------------------------------ */
1427 u_short fr_nextipid(fin)
1428 fr_info_t *fin;
1429 {
1430         static u_short ipid = 0;
1431         u_short id;
1432         ipf_stack_t *ifs = fin->fin_ifs;
1433 
1434         MUTEX_ENTER(&ifs->ifs_ipf_rw);
1435         if (fin->fin_pktnum != 0) {
1436                 id = fin->fin_pktnum & 0xffff;
1437         } else {
1438                 id = ipid++;
1439         }
1440         MUTEX_EXIT(&ifs->ifs_ipf_rw);
1441 
1442         return id;
1443 }
1444 
1445 
1446 #ifndef IPFILTER_CKSUM
1447 /* ARGSUSED */
1448 #endif
1449 INLINE void fr_checkv4sum(fin)
1450 fr_info_t *fin;
1451 {
1452 #ifdef IPFILTER_CKSUM
1453         if (fr_checkl4sum(fin) == -1)
1454                 fin->fin_flx |= FI_BAD;
1455 #endif
1456 }
1457 
1458 
1459 #ifdef USE_INET6
1460 # ifndef IPFILTER_CKSUM
1461 /* ARGSUSED */
1462 # endif
1463 INLINE void fr_checkv6sum(fin)
1464 fr_info_t *fin;
1465 {
1466 # ifdef IPFILTER_CKSUM
1467         if (fr_checkl4sum(fin) == -1)
1468                 fin->fin_flx |= FI_BAD;
1469 # endif
1470 }
1471 #endif /* USE_INET6 */
1472 
1473 
1474 #if (SOLARIS2 < 7)
1475 void fr_slowtimer()
1476 #else
1477 /*ARGSUSED*/
1478 void fr_slowtimer __P((void *arg))
1479 #endif
1480 {
1481         ipf_stack_t *ifs = arg;
1482 
1483         READ_ENTER(&ifs->ifs_ipf_global);
1484         if (ifs->ifs_fr_running != 1) {
1485                 ifs->ifs_fr_timer_id = NULL;
1486                 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1487                 return;
1488         }
1489         ipf_expiretokens(ifs);
1490         fr_fragexpire(ifs);
1491         fr_timeoutstate(ifs);
1492         fr_natexpire(ifs);
1493         fr_authexpire(ifs);
1494         ifs->ifs_fr_ticks++;
1495         if (ifs->ifs_fr_running == 1)
1496                 ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg,
1497                     drv_usectohz(500000));
1498         else
1499                 ifs->ifs_fr_timer_id = NULL;
1500         RWLOCK_EXIT(&ifs->ifs_ipf_global);
1501 }
1502 
1503 
1504 /* ------------------------------------------------------------------------ */
1505 /* Function:    fr_pullup                                                   */
1506 /* Returns:     NULL == pullup failed, else pointer to protocol header      */
1507 /* Parameters:  m(I)   - pointer to buffer where data packet starts         */
1508 /*              fin(I) - pointer to packet information                      */
1509 /*              len(I) - number of bytes to pullup                          */
1510 /*                                                                          */
1511 /* Attempt to move at least len bytes (from the start of the buffer) into a */
1512 /* single buffer for ease of access.  Operating system native functions are */
1513 /* used to manage buffers - if necessary.  If the entire packet ends up in  */
1514 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has  */
1515 /* not been called.  Both fin_ip and fin_dp are updated before exiting _IF_ */
1516 /* and ONLY if the pullup succeeds.                                         */
1517 /*                                                                          */
1518 /* We assume that 'min' is a pointer to a buffer that is part of the chain  */
1519 /* of buffers that starts at *fin->fin_mp.                                  */
1520 /* ------------------------------------------------------------------------ */
1521 void *fr_pullup(min, fin, len)
1522 mb_t *min;
1523 fr_info_t *fin;
1524 int len;
1525 {
1526         qpktinfo_t *qpi = fin->fin_qpi;
1527         int out = fin->fin_out, dpoff, ipoff;
1528         mb_t *m = min, *m1, *m2;
1529         char *ip;
1530         uint32_t start, stuff, end, value, flags;
1531         ipf_stack_t *ifs = fin->fin_ifs;
1532 
1533         if (m == NULL)
1534                 return NULL;
1535 
1536         ip = (char *)fin->fin_ip;
1537         if ((fin->fin_flx & FI_COALESCE) != 0)
1538                 return ip;
1539 
1540         ipoff = fin->fin_ipoff;
1541         if (fin->fin_dp != NULL)
1542                 dpoff = (char *)fin->fin_dp - (char *)ip;
1543         else
1544                 dpoff = 0;
1545 
1546         if (M_LEN(m) < len + ipoff) {
1547 
1548                 /*
1549                  * pfil_precheck ensures the IP header is on a 32bit
1550                  * aligned address so simply fail if that isn't currently
1551                  * the case (should never happen).
1552                  */
1553                 int inc = 0;
1554 
1555                 if (ipoff > 0) {
1556                         if ((ipoff & 3) != 0) {
1557                                 inc = 4 - (ipoff & 3);
1558                                 if (m->b_rptr - inc >= m->b_datap->db_base)
1559                                         m->b_rptr -= inc;
1560                                 else
1561                                         inc = 0;
1562                         }
1563                 }
1564 
1565                 /*
1566                  * XXX This is here as a work around for a bug with DEBUG
1567                  * XXX Solaris kernels.  The problem is b_prev is used by IP
1568                  * XXX code as a way to stash the phyint_index for a packet,
1569                  * XXX this doesn't get reset by IP but freeb does an ASSERT()
1570                  * XXX for both of these to be NULL.  See 6442390.
1571                  */
1572                 m1 = m;
1573                 m2 = m->b_prev;
1574 
1575                 do {
1576                         m1->b_next = NULL;
1577                         m1->b_prev = NULL;
1578                         m1 = m1->b_cont;
1579                 } while (m1);
1580 
1581                 /*
1582                  * Need to preserve checksum information by copying them
1583                  * to newmp which heads the pulluped message.
1584                  */
1585                 hcksum_retrieve(m, NULL, NULL, &start, &stuff, &end,
1586                     &value, &flags);
1587 
1588                 if (pullupmsg(m, len + ipoff + inc) == 0) {
1589                         ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]);
1590                         FREE_MB_T(*fin->fin_mp);
1591                         *fin->fin_mp = NULL;
1592                         fin->fin_m = NULL;
1593                         fin->fin_ip = NULL;
1594                         fin->fin_dp = NULL;
1595                         qpi->qpi_data = NULL;
1596                         return NULL;
1597                 }
1598 
1599                 (void) hcksum_assoc(m, NULL, NULL, start, stuff, end,
1600                     value, flags, 0);
1601 
1602                 m->b_prev = m2;
1603                 m->b_rptr += inc;
1604                 fin->fin_m = m;
1605                 ip = MTOD(m, char *) + ipoff;
1606                 qpi->qpi_data = ip;
1607         }
1608 
1609         ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]);
1610         fin->fin_ip = (ip_t *)ip;
1611         if (fin->fin_dp != NULL)
1612                 fin->fin_dp = (char *)fin->fin_ip + dpoff;
1613 
1614         if (len == fin->fin_plen)
1615                 fin->fin_flx |= FI_COALESCE;
1616         return ip;
1617 }
1618 
1619 
1620 /*
1621  * Function:    fr_verifysrc
1622  * Returns:     int (really boolean)
1623  * Parameters:  fin - packet information
1624  *
1625  * Check whether the packet has a valid source address for the interface on
1626  * which the packet arrived, implementing the "fr_chksrc" feature.
1627  * Returns true iff the packet's source address is valid.
1628  */
1629 int fr_verifysrc(fin)
1630 fr_info_t *fin;
1631 {
1632         net_handle_t net_data_p;
1633         phy_if_t phy_ifdata_routeto;
1634         struct sockaddr sin;
1635         ipf_stack_t *ifs = fin->fin_ifs;
1636 
1637         if (fin->fin_v == 4) { 
1638                 net_data_p = ifs->ifs_ipf_ipv4;
1639         } else if (fin->fin_v == 6) { 
1640                 net_data_p = ifs->ifs_ipf_ipv6;
1641         } else { 
1642                 return (0); 
1643         }
1644 
1645         /* Get the index corresponding to the if name */
1646         sin.sa_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
1647         bcopy(&fin->fin_saddr, &sin.sa_data, sizeof (struct in_addr));
1648         phy_ifdata_routeto = net_routeto(net_data_p, &sin, NULL);
1649 
1650         return (((phy_if_t)fin->fin_ifp == phy_ifdata_routeto) ? 1 : 0); 
1651 }
1652 
1653 
1654 /*
1655  * Function:    fr_fastroute
1656  * Returns:      0: success;
1657  *              -1: failed
1658  * Parameters:
1659  *      mb: the message block where ip head starts
1660  *      mpp: the pointer to the pointer of the orignal
1661  *              packet message
1662  *      fin: packet information
1663  *      fdp: destination interface information
1664  *      if it is NULL, no interface information provided.
1665  *
1666  * This function is for fastroute/to/dup-to rules. It calls
1667  * pfil_make_lay2_packet to search route, make lay-2 header
1668  * ,and identify output queue for the IP packet.
1669  * The destination address depends on the following conditions:
1670  * 1: for fastroute rule, fdp is passed in as NULL, so the
1671  *      destination address is the IP Packet's destination address
1672  * 2: for to/dup-to rule, if an ip address is specified after
1673  *      the interface name, this address is the as destination
1674  *      address. Otherwise IP Packet's destination address is used
1675  */
1676 int fr_fastroute(mb, mpp, fin, fdp)
1677 mblk_t *mb, **mpp;
1678 fr_info_t *fin;
1679 frdest_t *fdp;
1680 {
1681         net_handle_t net_data_p;
1682         net_inject_t *inj;
1683         mblk_t *mp = NULL;
1684         frentry_t *fr = fin->fin_fr;
1685         qpktinfo_t *qpi;
1686         ip_t *ip;
1687 
1688         struct sockaddr_in *sin;
1689         struct sockaddr_in6 *sin6;
1690         struct sockaddr *sinp;
1691         ipf_stack_t *ifs = fin->fin_ifs;
1692 #ifndef sparc
1693         u_short __iplen, __ipoff;
1694 #endif
1695 
1696         if (fin->fin_v == 4) {
1697                 net_data_p = ifs->ifs_ipf_ipv4;
1698         } else if (fin->fin_v == 6) {
1699                 net_data_p = ifs->ifs_ipf_ipv6;
1700         } else {
1701                 return (-1);
1702         }
1703 
1704         inj = net_inject_alloc(NETINFO_VERSION);
1705         if (inj == NULL)
1706                 return -1;
1707 
1708         ip = fin->fin_ip;
1709         qpi = fin->fin_qpi;
1710 
1711         /*
1712          * If this is a duplicate mblk then we want ip to point at that
1713          * data, not the original, if and only if it is already pointing at
1714          * the current mblk data.
1715          *
1716          * Otherwise, if it's not a duplicate, and we're not already pointing
1717          * at the current mblk data, then we want to ensure that the data
1718          * points at ip.
1719          */
1720 
1721         if ((ip == (ip_t *)qpi->qpi_m->b_rptr) && (qpi->qpi_m != mb)) {
1722                 ip = (ip_t *)mb->b_rptr;
1723         } else if ((qpi->qpi_m == mb) && (ip != (ip_t *)qpi->qpi_m->b_rptr)) {
1724                 qpi->qpi_m->b_rptr = (uchar_t *)ip;
1725                 qpi->qpi_off = 0;
1726         }
1727 
1728         /*
1729          * If there is another M_PROTO, we don't want it
1730          */
1731         if (*mpp != mb) {
1732                 mp = unlinkb(*mpp);
1733                 freeb(*mpp);
1734                 *mpp = mp;
1735         }
1736 
1737         sinp = (struct sockaddr *)&inj->ni_addr;
1738         sin = (struct sockaddr_in *)sinp;
1739         sin6 = (struct sockaddr_in6 *)sinp;
1740         bzero((char *)&inj->ni_addr, sizeof (inj->ni_addr));
1741         inj->ni_addr.ss_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
1742         inj->ni_packet = mb;
1743 
1744         /*
1745          * In case we're here due to "to <if>" being used with
1746          * "keep state", check that we're going in the correct
1747          * direction.
1748          */
1749         if (fdp != NULL) {
1750                 if ((fr != NULL) && (fdp->fd_ifp != NULL) &&
1751                         (fin->fin_rev != 0) && (fdp == &fr->fr_tif))
1752                         goto bad_fastroute;
1753                 inj->ni_physical = (phy_if_t)fdp->fd_ifp;
1754                 if (fin->fin_v == 4) {
1755                         sin->sin_addr = fdp->fd_ip;
1756                 } else {
1757                         sin6->sin6_addr = fdp->fd_ip6.in6;
1758                 }
1759         } else {
1760                 if (fin->fin_v == 4) {
1761                         sin->sin_addr = ip->ip_dst;
1762                 } else {
1763                         sin6->sin6_addr = ((ip6_t *)ip)->ip6_dst;
1764                 }
1765                 inj->ni_physical = net_routeto(net_data_p, sinp, NULL);
1766         }
1767 
1768         /*
1769          * Clear the hardware checksum flags from packets that we are doing
1770          * input processing on as leaving them set will cause the outgoing
1771          * NIC (if it supports hardware checksum) to calculate them anew,
1772          * using the old (correct) checksums as the pseudo value to start
1773          * from.
1774          */
1775         if (fin->fin_out == 0) {
1776                 DB_CKSUMFLAGS(mb) = 0;
1777         }
1778 
1779         *mpp = mb;
1780 
1781         if (fin->fin_out == 0) {
1782                 void *saveifp;
1783                 u_32_t pass;
1784 
1785                 saveifp = fin->fin_ifp;
1786                 fin->fin_ifp = (void *)inj->ni_physical;
1787                 fin->fin_flx &= ~FI_STATE;
1788                 fin->fin_out = 1;
1789                 (void) fr_acctpkt(fin, &pass);
1790                 fin->fin_fr = NULL;
1791                 if (!fr || !(fr->fr_flags & FR_RETMASK))
1792                         (void) fr_checkstate(fin, &pass);
1793                 if (fr_checknatout(fin, NULL) == -1)
1794                         goto bad_fastroute;
1795                 fin->fin_out = 0;
1796                 fin->fin_ifp = saveifp;
1797         }
1798 #ifndef sparc
1799         if (fin->fin_v == 4) {
1800                 __iplen = (u_short)ip->ip_len,
1801                 __ipoff = (u_short)ip->ip_off;
1802 
1803                 ip->ip_len = htons(__iplen);
1804                 ip->ip_off = htons(__ipoff);
1805         }
1806 #endif
1807 
1808         if (net_data_p) {
1809                 if (net_inject(net_data_p, NI_DIRECT_OUT, inj) < 0) {
1810                         net_inject_free(inj);
1811                         return (-1);
1812                 }
1813         }
1814 
1815         ifs->ifs_fr_frouteok[0]++;
1816         net_inject_free(inj);
1817         return 0;
1818 bad_fastroute:
1819         net_inject_free(inj);
1820         freemsg(mb);
1821         ifs->ifs_fr_frouteok[1]++;
1822         return -1;
1823 }
1824 
1825 
1826 /* ------------------------------------------------------------------------ */
1827 /* Function:    ipf_hook4_out                                               */
1828 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1829 /* Parameters:  event(I)     - pointer to event                             */
1830 /*              info(I)      - pointer to hook information for firewalling  */
1831 /*                                                                          */
1832 /* Calling ipf_hook.                                                        */
1833 /* ------------------------------------------------------------------------ */
1834 /*ARGSUSED*/
1835 int ipf_hook4_out(hook_event_token_t token, hook_data_t info, void *arg)
1836 {
1837         return ipf_hook(info, 1, 0, arg);
1838 }
1839 /*ARGSUSED*/
1840 int ipf_hook6_out(hook_event_token_t token, hook_data_t info, void *arg)
1841 {
1842         return ipf_hook6(info, 1, 0, arg);
1843 }
1844 
1845 /* ------------------------------------------------------------------------ */
1846 /* Function:    ipf_hook4_in                                                */
1847 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1848 /* Parameters:  event(I)     - pointer to event                             */
1849 /*              info(I)      - pointer to hook information for firewalling  */
1850 /*                                                                          */
1851 /* Calling ipf_hook.                                                        */
1852 /* ------------------------------------------------------------------------ */
1853 /*ARGSUSED*/
1854 int ipf_hook4_in(hook_event_token_t token, hook_data_t info, void *arg)
1855 {
1856         return ipf_hook(info, 0, 0, arg);
1857 }
1858 /*ARGSUSED*/
1859 int ipf_hook6_in(hook_event_token_t token, hook_data_t info, void *arg)
1860 {
1861         return ipf_hook6(info, 0, 0, arg);
1862 }
1863 
1864 
1865 /* ------------------------------------------------------------------------ */
1866 /* Function:    ipf_hook4_loop_out                                          */
1867 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1868 /* Parameters:  event(I)     - pointer to event                             */
1869 /*              info(I)      - pointer to hook information for firewalling  */
1870 /*                                                                          */
1871 /* Calling ipf_hook.                                                        */
1872 /* ------------------------------------------------------------------------ */
1873 /*ARGSUSED*/
1874 int ipf_hook4_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
1875 {
1876         return ipf_hook(info, 1, FI_NOCKSUM, arg);
1877 }
1878 /*ARGSUSED*/
1879 int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
1880 {
1881         return ipf_hook6(info, 1, FI_NOCKSUM, arg);
1882 }
1883 
1884 /* ------------------------------------------------------------------------ */
1885 /* Function:    ipf_hook4_loop_in                                           */
1886 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1887 /* Parameters:  event(I)     - pointer to event                             */
1888 /*              info(I)      - pointer to hook information for firewalling  */
1889 /*                                                                          */
1890 /* Calling ipf_hook.                                                        */
1891 /* ------------------------------------------------------------------------ */
1892 /*ARGSUSED*/
1893 int ipf_hook4_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
1894 {
1895         return ipf_hook(info, 0, FI_NOCKSUM, arg);
1896 }
1897 /*ARGSUSED*/
1898 int ipf_hook6_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
1899 {
1900         return ipf_hook6(info, 0, FI_NOCKSUM, arg);
1901 }
1902 
1903 /* ------------------------------------------------------------------------ */
1904 /* Function:    ipf_hook                                                    */
1905 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1906 /* Parameters:  info(I)      - pointer to hook information for firewalling  */
1907 /*              out(I)       - whether packet is going in or out            */
1908 /*              loopback(I)  - whether packet is a loopback packet or not   */
1909 /*                                                                          */
1910 /* Stepping stone function between the IP mainline and IPFilter.  Extracts  */
1911 /* parameters out of the info structure and forms them up to be useful for  */
1912 /* calling ipfilter.                                                        */
1913 /* ------------------------------------------------------------------------ */
1914 int ipf_hook(hook_data_t info, int out, int loopback, void *arg)
1915 {
1916         hook_pkt_event_t *fw;
1917         ipf_stack_t *ifs;
1918         qpktinfo_t qpi;
1919         int rval, hlen;
1920         u_short swap;
1921         phy_if_t phy; 
1922         ip_t *ip;
1923 
1924         ifs = arg;
1925         fw = (hook_pkt_event_t *)info;
1926 
1927         ASSERT(fw != NULL);
1928         phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
1929 
1930         ip = fw->hpe_hdr;
1931         swap = ntohs(ip->ip_len);
1932         ip->ip_len = swap;
1933         swap = ntohs(ip->ip_off);
1934         ip->ip_off = swap;
1935         hlen = IPH_HDR_LENGTH(ip);
1936 
1937         qpi.qpi_m = fw->hpe_mb;
1938         qpi.qpi_data = fw->hpe_hdr;
1939         qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
1940         qpi.qpi_ill = (void *)phy;
1941         qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
1942         if (qpi.qpi_flags)
1943                 qpi.qpi_flags |= FI_MBCAST;
1944         qpi.qpi_flags |= loopback;
1945 
1946         rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
1947             &qpi, fw->hpe_mp, ifs);
1948 
1949         /* For fastroute cases, fr_check returns 0 with mp set to NULL */
1950         if (rval == 0 && *(fw->hpe_mp) == NULL)
1951                 rval = 1;
1952 
1953         /* Notify IP the packet mblk_t and IP header pointers. */
1954         fw->hpe_mb = qpi.qpi_m;
1955         fw->hpe_hdr = qpi.qpi_data;
1956         if (rval == 0) {
1957                 ip = qpi.qpi_data;
1958                 swap = ntohs(ip->ip_len);
1959                 ip->ip_len = swap;
1960                 swap = ntohs(ip->ip_off);
1961                 ip->ip_off = swap;
1962         }
1963         return rval;
1964 
1965 }
1966 int ipf_hook6(hook_data_t info, int out, int loopback, void *arg)
1967 {
1968         hook_pkt_event_t *fw;
1969         int rval, hlen;
1970         qpktinfo_t qpi;
1971         phy_if_t phy; 
1972 
1973         fw = (hook_pkt_event_t *)info;
1974 
1975         ASSERT(fw != NULL);
1976         phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
1977 
1978         hlen = sizeof (ip6_t);
1979 
1980         qpi.qpi_m = fw->hpe_mb;
1981         qpi.qpi_data = fw->hpe_hdr;
1982         qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
1983         qpi.qpi_ill = (void *)phy;
1984         qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
1985         if (qpi.qpi_flags)
1986                 qpi.qpi_flags |= FI_MBCAST;
1987         qpi.qpi_flags |= loopback;
1988 
1989         rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
1990             &qpi, fw->hpe_mp, arg);
1991 
1992         /* For fastroute cases, fr_check returns 0 with mp set to NULL */
1993         if (rval == 0 && *(fw->hpe_mp) == NULL)
1994                 rval = 1;
1995 
1996         /* Notify IP the packet mblk_t and IP header pointers. */
1997         fw->hpe_mb = qpi.qpi_m;
1998         fw->hpe_hdr = qpi.qpi_data;
1999         return rval;
2000 
2001 }
2002 
2003 
2004 /* ------------------------------------------------------------------------ */
2005 /* Function:    ipf_nic_event_v4                                            */
2006 /* Returns:     int - 0 == no problems encountered                          */
2007 /* Parameters:  event(I)     - pointer to event                             */
2008 /*              info(I)      - pointer to information about a NIC event     */
2009 /*                                                                          */
2010 /* Function to receive asynchronous NIC events from IP                      */
2011 /* ------------------------------------------------------------------------ */
2012 /*ARGSUSED*/
2013 int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, void *arg)
2014 {
2015         struct sockaddr_in *sin;
2016         hook_nic_event_t *hn;
2017         ipf_stack_t *ifs = arg;
2018         void *new_ifp = NULL;
2019 
2020         if (ifs->ifs_fr_running <= 0)
2021                 return (0);
2022 
2023         hn = (hook_nic_event_t *)info;
2024 
2025         switch (hn->hne_event)
2026         {
2027         case NE_PLUMB :
2028                 frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data,
2029                        ifs);
2030                 fr_natifpsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2031                               hn->hne_data, ifs);
2032                 fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2033                              hn->hne_data, ifs);
2034                 break;
2035 
2036         case NE_UNPLUMB :
2037                 frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2038                 fr_natifpsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL,
2039                               ifs);
2040                 fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2041                 break;
2042 
2043         case NE_ADDRESS_CHANGE :
2044                 /*
2045                  * We only respond to events for logical interface 0 because
2046                  * IPFilter only uses the first address given to a network
2047                  * interface.  We check for hne_lif==1 because the netinfo
2048                  * code maps adds 1 to the lif number so that it can return
2049                  * 0 to indicate "no more lifs" when walking them.
2050                  */
2051                 if (hn->hne_lif == 1) {
2052                         frsync(IPFSYNC_RESYNC, 4, (void *)hn->hne_nic, NULL,
2053                             ifs);
2054                         sin = hn->hne_data;
2055                         fr_nataddrsync(4, (void *)hn->hne_nic, &sin->sin_addr,
2056                             ifs);
2057                 }
2058                 break;
2059 
2060 #if SOLARIS2 >= 10
2061         case NE_IFINDEX_CHANGE :
2062                 WRITE_ENTER(&ifs->ifs_ipf_mutex);
2063 
2064                 if (hn->hne_data != NULL) {
2065                         /*
2066                          * The netinfo passes interface index as int (hne_data should be
2067                          * handled as a pointer to int), which is always 32bit. We need to
2068                          * convert it to void pointer here, since interfaces are
2069                          * represented as pointers to void in IPF. The pointers are 64 bits
2070                          * long on 64bit platforms. Doing something like
2071                          *      (void *)((int) x)
2072                          * will throw warning:
2073                          *   "cast to pointer from integer of different size"
2074                          * during 64bit compilation.
2075                          *
2076                          * The line below uses (size_t) to typecast int to
2077                          * size_t, which might be 64bit/32bit (depending
2078                          * on architecture). Once we have proper 64bit/32bit
2079                          * type (size_t), we can safely convert it to void pointer.
2080                          */
2081                         new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2082                         fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2083                         fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2084                         fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2085                 }
2086                 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2087                 break;
2088 #endif
2089 
2090         default :
2091                 break;
2092         }
2093 
2094         return 0;
2095 }
2096 
2097 
2098 /* ------------------------------------------------------------------------ */
2099 /* Function:    ipf_nic_event_v6                                            */
2100 /* Returns:     int - 0 == no problems encountered                          */
2101 /* Parameters:  event(I)     - pointer to event                             */
2102 /*              info(I)      - pointer to information about a NIC event     */
2103 /*                                                                          */
2104 /* Function to receive asynchronous NIC events from IP                      */
2105 /* ------------------------------------------------------------------------ */
2106 /*ARGSUSED*/
2107 int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, void *arg)
2108 {
2109         struct sockaddr_in6 *sin6;
2110         hook_nic_event_t *hn;
2111         ipf_stack_t *ifs = arg;
2112         void *new_ifp = NULL;
2113 
2114         if (ifs->ifs_fr_running <= 0)
2115                 return (0);
2116 
2117         hn = (hook_nic_event_t *)info;
2118 
2119         switch (hn->hne_event)
2120         {
2121         case NE_PLUMB :
2122                 frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2123                        hn->hne_data, ifs);
2124                 fr_natifpsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2125                               hn->hne_data, ifs);
2126                 fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2127                              hn->hne_data, ifs);
2128                 break;
2129 
2130         case NE_UNPLUMB :
2131                 frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2132                 fr_natifpsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL,
2133                               ifs);
2134                 fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2135                 break;
2136 
2137         case NE_ADDRESS_CHANGE :
2138                 if (hn->hne_lif == 1) {
2139                         sin6 = hn->hne_data;
2140                         fr_nataddrsync(6, (void *)hn->hne_nic, &sin6->sin6_addr,
2141                                        ifs);
2142                 }
2143                 break;
2144 
2145 #if SOLARIS2 >= 10
2146         case NE_IFINDEX_CHANGE :
2147                 WRITE_ENTER(&ifs->ifs_ipf_mutex);
2148                 if (hn->hne_data != NULL) {
2149                         /*
2150                          * The netinfo passes interface index as int (hne_data should be
2151                          * handled as a pointer to int), which is always 32bit. We need to
2152                          * convert it to void pointer here, since interfaces are
2153                          * represented as pointers to void in IPF. The pointers are 64 bits
2154                          * long on 64bit platforms. Doing something like
2155                          *      (void *)((int) x)
2156                          * will throw warning:
2157                          *   "cast to pointer from integer of different size"
2158                          * during 64bit compilation.
2159                          *
2160                          * The line below uses (size_t) to typecast int to
2161                          * size_t, which might be 64bit/32bit (depending
2162                          * on architecture). Once we have proper 64bit/32bit
2163                          * type (size_t), we can safely convert it to void pointer.
2164                          */
2165                         new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2166                         fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2167                         fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2168                         fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2169                 }
2170                 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2171                 break;
2172 #endif
2173 
2174         default :
2175                 break;
2176         }
2177 
2178         return 0;
2179 }
2180 
2181 /*
2182  * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6()
2183  * are needed in Solaris kernel only. We don't need them in
2184  * ipftest to pretend the ICMP/RST packet was sent as a response.
2185  */
2186 #if defined(_KERNEL) && (SOLARIS2 >= 10)
2187 /* ------------------------------------------------------------------------ */
2188 /* Function:    fr_make_rst                                                 */
2189 /* Returns:     int - 0 on success, -1 on failure                           */
2190 /* Parameters:  fin(I) - pointer to packet information                      */
2191 /*                                                                          */
2192 /* We must alter the original mblks passed to IPF from IP stack via         */
2193 /* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations.  */
2194 /* IPF can basicaly do only these things with mblk representing the packet: */
2195 /*      leave it as it is (pass the packet)                                 */
2196 /*                                                                          */
2197 /*      discard it (block the packet)                                       */
2198 /*                                                                          */
2199 /*      alter it (i.e. NAT)                                                 */
2200 /*                                                                          */
2201 /* As you can see IPF can not simply discard the mblk and supply a new one  */
2202 /* instead to IP stack via FW_HOOKS.                                        */
2203 /*                                                                          */
2204 /* The return-rst action for packets coming via NIC is handled as follows:  */
2205 /*      mblk with packet is discarded                                       */
2206 /*                                                                          */
2207 /*      new mblk with RST response is constructed and injected to network   */
2208 /*                                                                          */
2209 /* IPF can't inject packets to loopback interface, this is just another     */
2210 /* limitation we have to deal with here. The only option to send RST        */
2211 /* response to offending TCP packet coming via loopback is to alter it.     */
2212 /*                                                                          */
2213 /* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on      */
2214 /* loopback interface into TCP RST packet. fin->fin_mp is pointer to     */
2215 /* mblk L3 (IP) and L4 (TCP/UDP) packet headers.                            */
2216 /* ------------------------------------------------------------------------ */
2217 int fr_make_rst(fin)
2218 fr_info_t *fin;
2219 {
2220         uint16_t tmp_port;
2221         int rv = -1;
2222         uint32_t old_ack;
2223         tcphdr_t *tcp = NULL;
2224         struct in_addr tmp_src;
2225 #ifdef USE_INET6
2226         struct in6_addr tmp_src6;
2227 #endif
2228         
2229         ASSERT(fin->fin_p == IPPROTO_TCP);
2230 
2231         /*
2232          * We do not need to adjust chksum, since it is not being checked by
2233          * Solaris IP stack for loopback clients.
2234          */
2235         if ((fin->fin_v == 4) && (fin->fin_p == IPPROTO_TCP) &&
2236             ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2237 
2238                 if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2239                         /* Swap IPv4 addresses. */
2240                         tmp_src = fin->fin_ip->ip_src;
2241                         fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2242                         fin->fin_ip->ip_dst = tmp_src;
2243 
2244                         rv = 0;
2245                 }
2246                 else
2247                         tcp = NULL;
2248         }
2249 #ifdef USE_INET6
2250         else if ((fin->fin_v == 6) && (fin->fin_p == IPPROTO_TCP) &&
2251             ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2252                 /*
2253                  * We are relying on fact the next header is TCP, which is true
2254                  * for regular TCP packets coming in over loopback.
2255                  */
2256                 if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2257                         /* Swap IPv6 addresses. */
2258                         tmp_src6 = fin->fin_ip6->ip6_src;
2259                         fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2260                         fin->fin_ip6->ip6_dst = tmp_src6;
2261 
2262                         rv = 0;
2263                 }
2264                 else
2265                         tcp = NULL;
2266         }
2267 #endif
2268 
2269         if (tcp != NULL) {
2270                 /* 
2271                  * Adjust TCP header:
2272                  *      swap ports,
2273                  *      set flags,
2274                  *      set correct ACK number
2275                  */
2276                 tmp_port = tcp->th_sport;
2277                 tcp->th_sport = tcp->th_dport;
2278                 tcp->th_dport = tmp_port;
2279                 old_ack = tcp->th_ack;
2280                 tcp->th_ack = htonl(ntohl(tcp->th_seq) + 1);
2281                 tcp->th_seq = old_ack;
2282                 tcp->th_flags = TH_RST | TH_ACK;
2283         }
2284 
2285         return (rv);
2286 }
2287 
2288 /* ------------------------------------------------------------------------ */
2289 /* Function:    fr_make_icmp_v4                                             */
2290 /* Returns:     int - 0 on success, -1 on failure                           */
2291 /* Parameters:  fin(I) - pointer to packet information                      */
2292 /*                                                                          */
2293 /* Please read comment at fr_make_icmp() wrapper function to get an idea    */
2294 /* what is going to happen here and why. Once you read the comment there,   */
2295 /* continue here with next paragraph.                                       */
2296 /*                                                                          */
2297 /* To turn IPv4 packet into ICMPv4 response packet, these things must       */
2298 /* happen here:                                                             */
2299 /*      (1) Original mblk is copied (duplicated).                           */
2300 /*                                                                          */
2301 /*      (2) ICMP header is created.                                         */
2302 /*                                                                          */
2303 /*      (3) Link ICMP header with copy of original mblk, we have ICMPv4     */
2304 /*          data ready then.                                                */
2305 /*                                                                          */
2306 /*      (4) Swap IP addresses in original mblk and adjust IP header data.   */
2307 /*                                                                          */
2308 /*      (5) The mblk containing original packet is trimmed to contain IP    */
2309 /*          header only and ICMP chksum is computed.                        */
2310 /*                                                                          */
2311 /*      (6) The ICMP header we have from (3) is linked to original mblk,    */
2312 /*          which now contains new IP header. If original packet was spread */
2313 /*          over several mblks, only the first mblk is kept.                */
2314 /* ------------------------------------------------------------------------ */
2315 static int fr_make_icmp_v4(fin)
2316 fr_info_t *fin;
2317 {
2318         struct in_addr tmp_src;
2319         tcphdr_t *tcp;
2320         struct icmp *icmp;
2321         mblk_t *mblk_icmp;
2322         mblk_t *mblk_ip;
2323         size_t icmp_pld_len;    /* octets to append to ICMP header */
2324         size_t orig_iphdr_len;  /* length of IP header only */
2325         uint32_t sum;
2326         uint16_t *buf;
2327         int len;
2328 
2329 
2330         if (fin->fin_v != 4)
2331                 return (-1);
2332 
2333         /*
2334          * If we are dealing with TCP, then packet must be SYN/FIN to be routed
2335          * by IP stack. If it is not SYN/FIN, then we must drop it silently.
2336          */
2337         tcp = (tcphdr_t *) fin->fin_dp;
2338 
2339         if ((fin->fin_p == IPPROTO_TCP) && 
2340             ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2341                 return (-1);
2342 
2343         /*
2344          * Step (1)
2345          *
2346          * Make copy of original mblk.
2347          *
2348          * We want to copy as much data as necessary, not less, not more.  The
2349          * ICMPv4 payload length for unreachable messages is:
2350          *      original IP header + 8 bytes of L4 (if there are any).
2351          *
2352          * We determine if there are at least 8 bytes of L4 data following IP
2353          * header first.
2354          */
2355         icmp_pld_len = (fin->fin_dlen > ICMPERR_ICMPHLEN) ?
2356                 ICMPERR_ICMPHLEN : fin->fin_dlen;
2357         /*
2358          * Since we don't want to copy more data than necessary, we must trim
2359          * the original mblk here.  The right way (STREAMish) would be to use
2360          * adjmsg() to trim it.  However we would have to calculate the length
2361          * argument for adjmsg() from pointers we already have here.
2362          *
2363          * Since we have pointers and offsets, it's faster and easier for
2364          * us to just adjust pointers by hand instead of using adjmsg().
2365          */
2366         fin->fin_m->b_wptr = (unsigned char *) fin->fin_dp;
2367         fin->fin_m->b_wptr += icmp_pld_len;
2368         icmp_pld_len = fin->fin_m->b_wptr - (unsigned char *) fin->fin_ip;
2369 
2370         /*
2371          * Also we don't want to copy any L2 stuff, which might precede IP
2372          * header, so we have have to set b_rptr to point to the start of IP
2373          * header.
2374          */
2375         fin->fin_m->b_rptr += fin->fin_ipoff;
2376         if ((mblk_ip = copyb(fin->fin_m)) == NULL)
2377                 return (-1);
2378         fin->fin_m->b_rptr -= fin->fin_ipoff;
2379 
2380         /*
2381          * Step (2)
2382          *
2383          * Create an ICMP header, which will be appened to original mblk later.
2384          * ICMP header is just another mblk.
2385          */
2386         mblk_icmp = (mblk_t *) allocb(ICMPERR_ICMPHLEN, BPRI_HI);
2387         if (mblk_icmp == NULL) {
2388                 FREE_MB_T(mblk_ip);
2389                 return (-1);
2390         }
2391 
2392         MTYPE(mblk_icmp) = M_DATA;
2393         icmp = (struct icmp *) mblk_icmp->b_wptr;
2394         icmp->icmp_type = ICMP_UNREACH;
2395         icmp->icmp_code = fin->fin_icode & 0xFF;
2396         icmp->icmp_void = 0;
2397         icmp->icmp_cksum = 0;
2398         mblk_icmp->b_wptr += ICMPERR_ICMPHLEN;
2399 
2400         /*
2401          * Step (3)
2402          *
2403          * Complete ICMP packet - link ICMP header with L4 data from original
2404          * IP packet.
2405          */
2406         linkb(mblk_icmp, mblk_ip);
2407 
2408         /*
2409          * Step (4)
2410          *
2411          * Swap IP addresses and change IP header fields accordingly in
2412          * original IP packet.
2413          *
2414          * There is a rule option return-icmp as a dest for physical
2415          * interfaces. This option becomes useless for loopback, since IPF box
2416          * uses same address as a loopback destination. We ignore the option
2417          * here, the ICMP packet will always look like as it would have been
2418          * sent from the original destination host.
2419          */
2420         tmp_src = fin->fin_ip->ip_src;
2421         fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2422         fin->fin_ip->ip_dst = tmp_src;
2423         fin->fin_ip->ip_p = IPPROTO_ICMP;
2424         fin->fin_ip->ip_sum = 0;
2425 
2426         /*
2427          * Step (5)
2428          *
2429          * We trim the orignal mblk to hold IP header only.
2430          */
2431         fin->fin_m->b_wptr = fin->fin_dp;
2432         orig_iphdr_len = fin->fin_m->b_wptr -
2433                             (fin->fin_m->b_rptr + fin->fin_ipoff);
2434         fin->fin_ip->ip_len = htons(icmp_pld_len + ICMPERR_ICMPHLEN +
2435                             orig_iphdr_len);
2436 
2437         /*
2438          * ICMP chksum calculation. The data we are calculating chksum for are
2439          * spread over two mblks, therefore we have to use two for loops.
2440          *
2441          * First for loop computes chksum part for ICMP header.
2442          */
2443         buf = (uint16_t *) icmp;
2444         len = ICMPERR_ICMPHLEN;
2445         for (sum = 0; len > 1; len -= 2)
2446                 sum += *buf++;
2447 
2448         /*
2449          * Here we add chksum part for ICMP payload.
2450          */
2451         len = icmp_pld_len;
2452         buf = (uint16_t *) mblk_ip->b_rptr;
2453         for (; len > 1; len -= 2)
2454                 sum += *buf++;
2455 
2456         /*
2457          * Chksum is done.
2458          */
2459         sum = (sum >> 16) + (sum & 0xffff);
2460         sum += (sum >> 16);
2461         icmp->icmp_cksum = ~sum; 
2462 
2463         /*
2464          * Step (6)
2465          *
2466          * Release all packet mblks, except the first one.
2467          */
2468         if (fin->fin_m->b_cont != NULL) {
2469                 FREE_MB_T(fin->fin_m->b_cont);
2470         }
2471 
2472         /*
2473          * Append ICMP payload to first mblk, which already contains new IP
2474          * header.
2475          */
2476         linkb(fin->fin_m, mblk_icmp);
2477 
2478         return (0);
2479 }
2480 
2481 #ifdef USE_INET6
2482 /* ------------------------------------------------------------------------ */
2483 /* Function:    fr_make_icmp_v6                                             */
2484 /* Returns:     int - 0 on success, -1 on failure                           */
2485 /* Parameters:  fin(I) - pointer to packet information                      */
2486 /*                                                                          */
2487 /* Please read comment at fr_make_icmp() wrapper function to get an idea    */
2488 /* what and why is going to happen here. Once you read the comment there,   */
2489 /* continue here with next paragraph.                                       */
2490 /*                                                                          */
2491 /* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response.    */
2492 /* The algorithm is fairly simple:                                          */
2493 /*      1) We need to get copy of complete mblk.                            */
2494 /*                                                                          */
2495 /*      2) New ICMPv6 header is created.                                    */
2496 /*                                                                          */
2497 /*      3) The copy of original mblk with packet is linked to ICMPv6        */
2498 /*         header.                                                          */
2499 /*                                                                          */
2500 /*      4) The checksum must be adjusted.                                   */
2501 /*                                                                          */
2502 /*      5) IP addresses in original mblk are swapped and IP header data     */
2503 /*         are adjusted (protocol number).                                  */
2504 /*                                                                          */
2505 /*      6) Original mblk is trimmed to hold IPv6 header only, then it is    */
2506 /*         linked with the ICMPv6 data we got from (3).                     */
2507 /* ------------------------------------------------------------------------ */
2508 static int fr_make_icmp_v6(fin)
2509 fr_info_t *fin;
2510 {
2511         struct icmp6_hdr *icmp6;
2512         tcphdr_t *tcp;
2513         struct in6_addr tmp_src6;
2514         size_t icmp_pld_len;
2515         mblk_t *mblk_ip, *mblk_icmp;
2516 
2517         if (fin->fin_v != 6)
2518                 return (-1);
2519 
2520         /*
2521          * If we are dealing with TCP, then packet must SYN/FIN to be routed by
2522          * IP stack. If it is not SYN/FIN, then we must drop it silently.
2523          */
2524         tcp = (tcphdr_t *) fin->fin_dp;
2525 
2526         if ((fin->fin_p == IPPROTO_TCP) && 
2527             ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2528                 return (-1);
2529 
2530         /*
2531          * Step (1)
2532          *
2533          * We need to copy complete packet in case of IPv6, no trimming is
2534          * needed (except the L2 headers).
2535          */
2536         icmp_pld_len = M_LEN(fin->fin_m);
2537         fin->fin_m->b_rptr += fin->fin_ipoff;
2538         if ((mblk_ip = copyb(fin->fin_m)) == NULL)
2539                 return (-1);
2540         fin->fin_m->b_rptr -= fin->fin_ipoff;
2541 
2542         /*
2543          * Step (2)
2544          *
2545          * Allocate and create ICMP header.
2546          */
2547         mblk_icmp = (mblk_t *) allocb(sizeof (struct icmp6_hdr),
2548                         BPRI_HI);
2549 
2550         if (mblk_icmp == NULL)
2551                 return (-1);
2552         
2553         MTYPE(mblk_icmp) = M_DATA;
2554         icmp6 =  (struct icmp6_hdr *) mblk_icmp->b_wptr;
2555         icmp6->icmp6_type = ICMP6_DST_UNREACH;
2556         icmp6->icmp6_code = fin->fin_icode & 0xFF;
2557         icmp6->icmp6_data32[0] = 0;
2558         mblk_icmp->b_wptr += sizeof (struct icmp6_hdr);
2559         
2560         /*
2561          * Step (3)
2562          *
2563          * Link the copy of IP packet to ICMP header.
2564          */
2565         linkb(mblk_icmp, mblk_ip);
2566 
2567         /* 
2568          * Step (4)
2569          *
2570          * Calculate chksum - this is much more easier task than in case of
2571          * IPv4  - ICMPv6 chksum only covers IP addresses, and payload length.
2572          * We are making compensation just for change of packet length.
2573          */
2574         icmp6->icmp6_cksum = icmp_pld_len + sizeof (struct icmp6_hdr);
2575 
2576         /*
2577          * Step (5)
2578          *
2579          * Swap IP addresses.
2580          */
2581         tmp_src6 = fin->fin_ip6->ip6_src;
2582         fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2583         fin->fin_ip6->ip6_dst = tmp_src6;
2584 
2585         /*
2586          * and adjust IP header data.
2587          */
2588         fin->fin_ip6->ip6_nxt = IPPROTO_ICMPV6;
2589         fin->fin_ip6->ip6_plen = htons(icmp_pld_len + sizeof (struct icmp6_hdr));
2590 
2591         /*
2592          * Step (6)
2593          *
2594          * We must release all linked mblks from original packet and keep only
2595          * the first mblk with IP header to link ICMP data.
2596          */
2597         fin->fin_m->b_wptr = (unsigned char *) fin->fin_ip6 + sizeof (ip6_t);
2598 
2599         if (fin->fin_m->b_cont != NULL) {
2600                 FREE_MB_T(fin->fin_m->b_cont);
2601         }
2602 
2603         /*
2604          * Append ICMP payload to IP header.
2605          */
2606         linkb(fin->fin_m, mblk_icmp);
2607 
2608         return (0);
2609 }
2610 #endif  /* USE_INET6 */
2611 
2612 /* ------------------------------------------------------------------------ */
2613 /* Function:    fr_make_icmp                                                */
2614 /* Returns:     int - 0 on success, -1 on failure                           */
2615 /* Parameters:  fin(I) - pointer to packet information                      */
2616 /*                                                                          */
2617 /* We must alter the original mblks passed to IPF from IP stack via         */
2618 /* FW_HOOKS. The reasons why we must alter packet are discussed within      */
2619 /* comment at fr_make_rst() function.                                       */
2620 /*                                                                          */
2621 /* The fr_make_icmp() function acts as a wrapper, which passes the code     */
2622 /* execution to fr_make_icmp_v4() or fr_make_icmp_v6() depending on         */
2623 /* protocol version. However there are some details, which are common to    */
2624 /* both IP versions. The details are going to be explained here.            */
2625 /*                                                                          */
2626 /* The packet looks as follows:                                             */
2627 /*    xxx | IP hdr | IP payload    ...  |                                   */
2628 /*    ^   ^        ^                    ^                                   */
2629 /*    |   |        |                    |                                   */
2630 /*    |   |        |            fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */
2631 /*    |   |        |                                                        */
2632 /*    |   |        `- fin_m->fin_dp (in case of IPv4 points to L4 header)   */
2633 /*    |   |                                                                 */
2634 /*    |   `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case  */
2635 /*    |      of loopback)                                                   */
2636 /*    |                                                                     */
2637 /*    `- fin_m->b_rptr -  points to L2 header in case of physical NIC            */
2638 /*                                                                          */
2639 /* All relevant IP headers are pulled up into the first mblk. It happened   */
2640 /* well in advance before the matching rule was found (the rule, which took */
2641 /* us here, to fr_make_icmp() function).                                    */
2642 /*                                                                          */
2643 /* Both functions will turn packet passed in fin->fin_m mblk into a new          */
2644 /* packet. New packet will be represented as chain of mblks.                */
2645 /* orig mblk |- b_cont ---.                                                 */
2646 /*    ^                    `-> ICMP hdr |- b_cont--.                     */
2647 /*    |                           ^                 `-> duped orig mblk          */
2648 /*    |                           |                             ^           */
2649 /*    `- The original mblk        |                             |           */
2650 /*       will be trimmed to       |                             |           */
2651 /*       to contain IP header     |                             |           */
2652 /*       only                     |                             |           */
2653 /*                                |                             |           */
2654 /*                                `- This is newly              |           */
2655 /*                                   allocated mblk to          |           */
2656 /*                                   hold ICMPv6 data.          |           */
2657 /*                                                              |           */
2658 /*                                                              |           */
2659 /*                                                              |           */
2660 /*          This is the copy of original mblk, it will contain -'           */
2661 /*          orignal IP  packet in case of ICMPv6. In case of                */
2662 /*          ICMPv4 it will contain up to 8 bytes of IP payload              */
2663 /*          (TCP/UDP/L4) data from original packet.                         */
2664 /* ------------------------------------------------------------------------ */
2665 int fr_make_icmp(fin)
2666 fr_info_t *fin;
2667 {
2668         int rv;
2669         
2670         if (fin->fin_v == 4)
2671                 rv = fr_make_icmp_v4(fin);
2672 #ifdef USE_INET6
2673         else if (fin->fin_v == 6)
2674                 rv = fr_make_icmp_v6(fin);
2675 #endif
2676         else
2677                 rv = -1;
2678 
2679         return (rv);
2680 }
2681 
2682 /* ------------------------------------------------------------------------ */
2683 /* Function:    fr_buf_sum                                                  */
2684 /* Returns:     unsigned int - sum of buffer buf                            */
2685 /* Parameters:  buf - pointer to buf we want to sum up                      */
2686 /*              len - length of buffer buf                                  */
2687 /*                                                                          */
2688 /* Sums buffer buf. The result is used for chksum calculation. The buf      */
2689 /* argument must be aligned.                                                */
2690 /* ------------------------------------------------------------------------ */
2691 static uint32_t fr_buf_sum(buf, len)
2692 const void *buf;
2693 unsigned int len;
2694 {
2695         uint32_t        sum = 0;
2696         uint16_t        *b = (uint16_t *)buf;
2697 
2698         while (len > 1) {
2699                 sum += *b++;
2700                 len -= 2;
2701         }
2702 
2703         if (len == 1)
2704                 sum += htons((*(unsigned char *)b) << 8);
2705 
2706         return (sum);
2707 }
2708 
2709 /* ------------------------------------------------------------------------ */
2710 /* Function:    fr_calc_chksum                                              */
2711 /* Returns:     void                                                        */
2712 /* Parameters:  fin - pointer to fr_info_t instance with packet data        */
2713 /*              pkt - pointer to duplicated packet                          */
2714 /*                                                                          */
2715 /* Calculates all chksums (L3, L4) for packet pkt. Works for both IP        */
2716 /* versions.                                                                */
2717 /* ------------------------------------------------------------------------ */
2718 void fr_calc_chksum(fin, pkt)
2719 fr_info_t *fin;
2720 mb_t *pkt;
2721 {
2722         struct pseudo_hdr {
2723                 union {
2724                         struct in_addr  in4;
2725 #ifdef USE_INET6
2726                         struct in6_addr in6;
2727 #endif
2728                 } src_addr;
2729                 union {
2730                         struct in_addr  in4;
2731 #ifdef USE_INET6
2732                         struct in6_addr in6;
2733 #endif
2734                 } dst_addr;
2735                 char            zero;
2736                 char            proto;
2737                 uint16_t        len;
2738         }       phdr;
2739         uint32_t        sum, ip_sum;
2740         void    *buf;
2741         uint16_t        *l4_csum_p;
2742         tcphdr_t        *tcp;
2743         udphdr_t        *udp;
2744         icmphdr_t       *icmp;
2745 #ifdef USE_INET6
2746         struct icmp6_hdr        *icmp6;
2747 #endif
2748         ip_t            *ip;
2749         unsigned int    len;
2750         int             pld_len;
2751 
2752         /*
2753          * We need to pullup the packet to the single continuous buffer to avoid
2754          * potential misaligment of b_rptr member in mblk chain.
2755          */
2756         if (pullupmsg(pkt, -1) == 0) {
2757                 cmn_err(CE_WARN, "Failed to pullup loopback pkt -> chksum"
2758                     " will not be computed by IPF");
2759                 return;
2760         }
2761 
2762         /*
2763          * It is guaranteed IP header starts right at b_rptr, because we are
2764          * working with a copy of the original packet.
2765          *
2766          * Compute pseudo header chksum for TCP and UDP.
2767          */
2768         if ((fin->fin_p == IPPROTO_UDP) ||
2769             (fin->fin_p == IPPROTO_TCP)) {
2770                 bzero(&phdr, sizeof (phdr));
2771 #ifdef USE_INET6
2772                 if (fin->fin_v == 6) {
2773                         phdr.src_addr.in6 = fin->fin_srcip6;
2774                         phdr.dst_addr.in6 = fin->fin_dstip6;
2775                 } else {
2776                         phdr.src_addr.in4 = fin->fin_src;
2777                         phdr.dst_addr.in4 = fin->fin_dst;
2778                 }
2779 #else
2780                 phdr.src_addr.in4 = fin->fin_src;
2781                 phdr.dst_addr.in4 = fin->fin_dst;
2782 #endif
2783                 phdr.zero = (char) 0;
2784                 phdr.proto = fin->fin_p;
2785                 phdr.len = htons((uint16_t)fin->fin_dlen);
2786                 sum = fr_buf_sum(&phdr, (unsigned int)sizeof (phdr));
2787         } else {
2788                 sum = 0;
2789         }
2790 
2791         /*
2792          * Set pointer to the L4 chksum field in the packet, set buf pointer to
2793          * the L4 header start.
2794          */
2795         switch (fin->fin_p) {
2796                 case IPPROTO_UDP:
2797                         udp = (udphdr_t *)(pkt->b_rptr + fin->fin_hlen);
2798                         l4_csum_p = &udp->uh_sum;
2799                         buf = udp;
2800                         break;
2801                 case IPPROTO_TCP:
2802                         tcp = (tcphdr_t *)(pkt->b_rptr + fin->fin_hlen);
2803                         l4_csum_p = &tcp->th_sum;
2804                         buf = tcp;
2805                         break;
2806                 case IPPROTO_ICMP:
2807                         icmp = (icmphdr_t *)(pkt->b_rptr + fin->fin_hlen);
2808                         l4_csum_p = &icmp->icmp_cksum;
2809                         buf = icmp;
2810                         break;
2811 #ifdef USE_INET6
2812                 case IPPROTO_ICMPV6:
2813                         icmp6 = (struct icmp6_hdr *)(pkt->b_rptr + fin->fin_hlen);
2814                         l4_csum_p = &icmp6->icmp6_cksum;
2815                         buf = icmp6;
2816                         break;
2817 #endif
2818                 default:
2819                         l4_csum_p = NULL;
2820         }
2821 
2822         /*
2823          * Compute L4 chksum if needed.
2824          */
2825         if (l4_csum_p != NULL) {
2826                 *l4_csum_p = (uint16_t)0;
2827                 pld_len = fin->fin_dlen;
2828                 len = pkt->b_wptr - (unsigned char *)buf;
2829                 ASSERT(len == pld_len);
2830                 /*
2831                  * Add payload sum to pseudoheader sum.
2832                  */
2833                 sum += fr_buf_sum(buf, len);
2834                 while (sum >> 16)
2835                         sum = (sum & 0xFFFF) + (sum >> 16);
2836 
2837                 *l4_csum_p = ~((uint16_t)sum);
2838                 DTRACE_PROBE1(l4_sum, uint16_t, *l4_csum_p);
2839         }
2840 
2841         /*
2842          * The IP header chksum is needed just for IPv4.
2843          */
2844         if (fin->fin_v == 4) {
2845                 /*
2846                  * Compute IPv4 header chksum.
2847                  */
2848                 ip = (ip_t *)pkt->b_rptr;
2849                 ip->ip_sum = (uint16_t)0;
2850                 ip_sum = fr_buf_sum(ip, (unsigned int)fin->fin_hlen);
2851                 while (ip_sum >> 16)
2852                         ip_sum = (ip_sum & 0xFFFF) + (ip_sum >> 16);
2853 
2854                 ip->ip_sum = ~((uint16_t)ip_sum);
2855                 DTRACE_PROBE1(l3_sum, uint16_t, ip->ip_sum);
2856         }
2857 
2858         return;
2859 }
2860 
2861 #endif  /* _KERNEL && SOLARIS2 >= 10 */