1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /
  28 / Inline functions for i386 kernels.
  29 /       Shared between all x86 platform variants.
  30 /
  31 
  32 /
  33 / return current thread pointer
  34 /
  35 / NOTE: the "0x10" should be replaced by the computed value of the
  36 /       offset of "cpu_thread" from the beginning of the struct cpu.
  37 /       Including "assym.h" does not work, however, since that stuff
  38 /       is PSM-specific and is only visible to the 'unix' build anyway.
  39 /       Same with current cpu pointer, where "0xc" should be replaced
  40 /       by the computed value of the offset of "cpu_self".
  41 /       Ugh -- what a disaster.
  42 /
  43         .inline threadp,0
  44         movl    %gs:0x10, %eax
  45         .end
  46 
  47 /
  48 / return current cpu pointer
  49 /
  50         .inline curcpup,0
  51         movl    %gs:0xc, %eax
  52         .end
  53 
  54 /
  55 / return caller
  56 /
  57         .inline caller,0
  58         movl    4(%ebp), %eax
  59         .end
  60 
  61 /
  62 / convert ipl to spl.  This is the identity function for i86
  63 /
  64         .inline ipltospl,0
  65         movl    (%esp), %eax
  66         .end
  67 
  68 /
  69 / find the low order bit in a word
  70 /
  71         .inline lowbit,4
  72         movl    $-1, %eax
  73         bsfl    (%esp), %eax
  74         incl    %eax
  75         .end
  76 
  77 /
  78 / find the high order bit in a word
  79 /
  80         .inline highbit,4
  81         movl    $-1, %eax
  82         bsrl    (%esp), %eax
  83         incl    %eax
  84         .end
  85 
  86 /
  87 / Networking byte order functions (too bad, Intel has the wrong byte order)
  88 /
  89         .inline htonll,4
  90         movl    (%esp), %edx
  91         movl    4(%esp), %eax
  92         bswap   %edx
  93         bswap   %eax
  94         .end
  95 
  96         .inline ntohll,4
  97         movl    (%esp), %edx
  98         movl    4(%esp), %eax
  99         bswap   %edx
 100         bswap   %eax
 101         .end
 102 
 103         .inline htonl,4
 104         movl    (%esp), %eax
 105         bswap   %eax
 106         .end
 107 
 108         .inline ntohl,4
 109         movl    (%esp), %eax
 110         bswap   %eax
 111         .end
 112 
 113         .inline htons,4
 114         movl    (%esp), %eax
 115         bswap   %eax
 116         shrl    $16, %eax
 117         .end
 118 
 119         .inline ntohs,4
 120         movl    (%esp), %eax
 121         bswap   %eax
 122         shrl    $16, %eax
 123         .end
 124 
 125 /*
 126  * multiply two long numbers and yield a u_longlong_t result
 127  * Provided to manipulate hrtime_t values.
 128  */
 129         .inline mul32, 8
 130         movl    4(%esp), %eax
 131         movl    (%esp), %ecx
 132         mull    %ecx
 133         .end
 134 
 135 /*
 136  * Unlock hres_lock and increment the count value. (See clock.h)
 137  */
 138         .inline unlock_hres_lock, 0
 139         lock
 140         incl    hres_lock
 141         .end
 142 
 143         .inline atomic_orb,8
 144         movl    (%esp), %eax
 145         movl    4(%esp), %edx
 146         lock
 147         orb     %dl,(%eax)
 148         .end
 149 
 150         .inline atomic_andb,8
 151         movl    (%esp), %eax
 152         movl    4(%esp), %edx
 153         lock
 154         andb    %dl,(%eax)
 155         .end
 156 
 157 /*
 158  * atomic inc/dec operations.
 159  *      void atomic_inc16(uint16_t *addr) { ++*addr; }
 160  *      void atomic_dec16(uint16_t *addr) { --*addr; }
 161  */
 162         .inline atomic_inc16,4
 163         movl    (%esp), %eax
 164         lock
 165         incw    (%eax)
 166         .end
 167 
 168         .inline atomic_dec16,4
 169         movl    (%esp), %eax
 170         lock
 171         decw    (%eax)
 172         .end
 173 
 174 /*
 175  * Call the pause instruction.  To the Pentium 4 Xeon processor, it acts as
 176  * a hint that the code sequence is a busy spin-wait loop.  Without a pause
 177  * instruction in these loops, the P4 Xeon processor may suffer a severe
 178  * penalty when exiting the loop because the processor detects a possible
 179  * memory violation.  Inserting the pause instruction significantly reduces
 180  * the likelihood of a memory order violation, improving performance.
 181  * The pause instruction is a NOP on all other IA-32 processors.
 182  */
 183         .inline ht_pause, 0
 184         rep                     / our compiler doesn't support "pause" yet,
 185         nop                     / so we're using "F3 90" opcode directly
 186         .end
 187 
 188 /*
 189  * prefetch 64 bytes
 190  *
 191  * prefetch is an SSE extension which is not supported on older 32-bit processors
 192  * so define this as a no-op for now
 193  */
 194 
 195         .inline prefetch_read_many,4
 196 /       movl            (%esp), %eax
 197 /       prefetcht0      (%eax)
 198 /       prefetcht0      32(%eax)
 199         .end
 200 
 201         .inline prefetch_read_once,4
 202 /       movl            (%esp), %eax
 203 /       prefetchnta     (%eax)
 204 /       prefetchnta     32(%eax)
 205         .end
 206 
 207         .inline prefetch_write_many,4
 208 /       movl            (%esp), %eax
 209 /       prefetcht0      (%eax)
 210 /       prefetcht0      32(%eax)
 211         .end
 212 
 213         .inline prefetch_write_once,4
 214 /       movl            (%esp), %eax
 215 /       prefetcht0      (%eax)
 216 /       prefetcht0      32(%eax)
 217         .end
 218