Print this page
patch nuke-the-dbuf-hash
patch make-the-merge-easy


  67         dmu_buf_impl_t *db = vdb;
  68         bzero(db, sizeof (dmu_buf_impl_t));
  69 
  70         mutex_init(&db->db_mtx, NULL, MUTEX_DEFAULT, NULL);
  71         cv_init(&db->db_changed, NULL, CV_DEFAULT, NULL);
  72         refcount_create(&db->db_holds);
  73 
  74         return (0);
  75 }
  76 
  77 /* ARGSUSED */
  78 static void
  79 dbuf_dest(void *vdb, void *unused)
  80 {
  81         dmu_buf_impl_t *db = vdb;
  82         mutex_destroy(&db->db_mtx);
  83         cv_destroy(&db->db_changed);
  84         refcount_destroy(&db->db_holds);
  85 }
  86 
  87 /*
  88  * dbuf hash table routines
  89  */
  90 #pragma align 64(dbuf_hash_table)
  91 static dbuf_hash_table_t dbuf_hash_table;
  92 
  93 static uint64_t dbuf_hash_count;
  94 
  95 static uint64_t
  96 dbuf_hash(void *os, uint64_t obj, uint8_t lvl, uint64_t blkid)
  97 {
  98         uintptr_t osv = (uintptr_t)os;
  99         uint64_t crc = -1ULL;
 100 
 101         ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
 102         crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (lvl)) & 0xFF];
 103         crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (osv >> 6)) & 0xFF];
 104         crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 0)) & 0xFF];
 105         crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 8)) & 0xFF];
 106         crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (blkid >> 0)) & 0xFF];
 107         crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (blkid >> 8)) & 0xFF];
 108 
 109         crc ^= (osv>>14) ^ (obj>>16) ^ (blkid>>16);
 110 
 111         return (crc);
 112 }
 113 
 114 #define DBUF_HASH(os, obj, level, blkid) dbuf_hash(os, obj, level, blkid);
 115 
 116 #define DBUF_EQUAL(dbuf, os, obj, level, blkid)         \
 117         ((dbuf)->db.db_object == (obj) &&            \
 118         (dbuf)->db_objset == (os) &&                 \
 119         (dbuf)->db_level == (level) &&                       \
 120         (dbuf)->db_blkid == (blkid))
 121 
 122 dmu_buf_impl_t *
 123 dbuf_find(dnode_t *dn, uint8_t level, uint64_t blkid)
 124 {
 125         dbuf_hash_table_t *h = &dbuf_hash_table;
 126         objset_t *os = dn->dn_objset;
 127         uint64_t obj = dn->dn_object;
 128         uint64_t hv = DBUF_HASH(os, obj, level, blkid);
 129         uint64_t idx = hv & h->hash_table_mask;
 130         dmu_buf_impl_t *db;















 131 
 132         mutex_enter(DBUF_HASH_MUTEX(h, idx));
 133         for (db = h->hash_table[idx]; db != NULL; db = db->db_hash_next) {
 134                 if (DBUF_EQUAL(db, os, obj, level, blkid)) {
 135                         mutex_enter(&db->db_mtx);
 136                         if (db->db_state != DB_EVICTING) {
 137                                 mutex_exit(DBUF_HASH_MUTEX(h, idx));
 138                                 return (db);
 139                         }
 140                         mutex_exit(&db->db_mtx);
 141                 }
 142         }
 143         mutex_exit(DBUF_HASH_MUTEX(h, idx));
 144         return (NULL);
 145 }
 146 
 147 /*
 148  * Insert an entry into the hash table.  If there is already an element
 149  * equal to elem in the hash table, then the already existing element
 150  * will be returned and the new element will not be inserted.
 151  * Otherwise returns NULL.
 152  */
 153 static dmu_buf_impl_t *
 154 dbuf_hash_insert(dmu_buf_impl_t *db)
 155 {
 156         dbuf_hash_table_t *h = &dbuf_hash_table;
 157         objset_t *os = db->db_objset;
 158         uint64_t obj = db->db.db_object;
 159         int level = db->db_level;
 160         uint64_t blkid = db->db_blkid;
 161         uint64_t hv = DBUF_HASH(os, obj, level, blkid);
 162         uint64_t idx = hv & h->hash_table_mask;
 163         dmu_buf_impl_t *dbf;
 164 
 165         mutex_enter(DBUF_HASH_MUTEX(h, idx));
 166         for (dbf = h->hash_table[idx]; dbf != NULL; dbf = dbf->db_hash_next) {
 167                 if (DBUF_EQUAL(dbf, os, obj, level, blkid)) {
 168                         mutex_enter(&dbf->db_mtx);
 169                         if (dbf->db_state != DB_EVICTING) {
 170                                 mutex_exit(DBUF_HASH_MUTEX(h, idx));
 171                                 return (dbf);
 172                         }
 173                         mutex_exit(&dbf->db_mtx);
 174                 }
 175         }
 176 
 177         mutex_enter(&db->db_mtx);
 178         db->db_hash_next = h->hash_table[idx];
 179         h->hash_table[idx] = db;
 180         mutex_exit(DBUF_HASH_MUTEX(h, idx));
 181         atomic_inc_64(&dbuf_hash_count);
 182 

 183         return (NULL);
 184 }
 185 
 186 /*
 187  * Remove an entry from the hash table.  It must be in the EVICTING state.
 188  */
 189 static void
 190 dbuf_hash_remove(dmu_buf_impl_t *db)
 191 {
 192         dbuf_hash_table_t *h = &dbuf_hash_table;
 193         uint64_t hv = DBUF_HASH(db->db_objset, db->db.db_object,
 194             db->db_level, db->db_blkid);
 195         uint64_t idx = hv & h->hash_table_mask;
 196         dmu_buf_impl_t *dbf, **dbp;
 197 
 198         /*
 199          * We musn't hold db_mtx to maintain lock ordering:
 200          * DBUF_HASH_MUTEX > db_mtx.
 201          */
 202         ASSERT(refcount_is_zero(&db->db_holds));
 203         ASSERT(db->db_state == DB_EVICTING);
 204         ASSERT(!MUTEX_HELD(&db->db_mtx));
 205 
 206         mutex_enter(DBUF_HASH_MUTEX(h, idx));
 207         dbp = &h->hash_table[idx];
 208         while ((dbf = *dbp) != db) {
 209                 dbp = &dbf->db_hash_next;
 210                 ASSERT(dbf != NULL);
 211         }
 212         *dbp = db->db_hash_next;
 213         db->db_hash_next = NULL;
 214         mutex_exit(DBUF_HASH_MUTEX(h, idx));
 215         atomic_dec_64(&dbuf_hash_count);
 216 }
 217 
 218 static arc_evict_func_t dbuf_do_evict;
 219 
 220 static void
 221 dbuf_evict_user(dmu_buf_impl_t *db)
 222 {
 223         ASSERT(MUTEX_HELD(&db->db_mtx));
 224 
 225         if (db->db_level != 0 || db->db_evict_func == NULL)
 226                 return;
 227 
 228         if (db->db_user_data_ptr_ptr)
 229                 *db->db_user_data_ptr_ptr = db->db.db_data;
 230         db->db_evict_func(&db->db, db->db_user_ptr);
 231         db->db_user_ptr = NULL;
 232         db->db_user_data_ptr_ptr = NULL;
 233         db->db_evict_func = NULL;
 234 }
 235 
 236 boolean_t
 237 dbuf_is_metadata(dmu_buf_impl_t *db)


 246                 DB_DNODE_EXIT(db);
 247 
 248                 return (is_metadata);
 249         }
 250 }
 251 
 252 void
 253 dbuf_evict(dmu_buf_impl_t *db)
 254 {
 255         ASSERT(MUTEX_HELD(&db->db_mtx));
 256         ASSERT(db->db_buf == NULL);
 257         ASSERT(db->db_data_pending == NULL);
 258 
 259         dbuf_clear(db);
 260         dbuf_destroy(db);
 261 }
 262 
 263 void
 264 dbuf_init(void)
 265 {
 266         uint64_t hsize = 1ULL << 16;
 267         dbuf_hash_table_t *h = &dbuf_hash_table;
 268         int i;
 269 
 270         /*
 271          * The hash table is big enough to fill all of physical memory
 272          * with an average 4K block size.  The table will take up
 273          * totalmem*sizeof(void*)/4K (i.e. 2MB/GB with 8-byte pointers).
 274          */
 275         while (hsize * 4096 < physmem * PAGESIZE)
 276                 hsize <<= 1;
 277 
 278 retry:
 279         h->hash_table_mask = hsize - 1;
 280         h->hash_table = kmem_zalloc(hsize * sizeof (void *), KM_NOSLEEP);
 281         if (h->hash_table == NULL) {
 282                 /* XXX - we should really return an error instead of assert */
 283                 ASSERT(hsize > (1ULL << 10));
 284                 hsize >>= 1;
 285                 goto retry;
 286         }
 287 
 288         dbuf_cache = kmem_cache_create("dmu_buf_impl_t",
 289             sizeof (dmu_buf_impl_t),
 290             0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0);
 291 
 292         for (i = 0; i < DBUF_MUTEXES; i++)
 293                 mutex_init(DBUF_HASH_MUTEX(h, i), NULL, MUTEX_DEFAULT, NULL);
 294 }
 295 
 296 void
 297 dbuf_fini(void)
 298 {
 299         dbuf_hash_table_t *h = &dbuf_hash_table;
 300         int i;
 301 
 302         for (i = 0; i < DBUF_MUTEXES; i++)
 303                 mutex_destroy(DBUF_HASH_MUTEX(h, i));
 304         kmem_free(h->hash_table, (h->hash_table_mask + 1) * sizeof (void *));
 305         kmem_cache_destroy(dbuf_cache);
 306 }
 307 
 308 /*
 309  * Other stuff.
 310  */
 311 
 312 #ifdef ZFS_DEBUG
 313 static void
 314 dbuf_verify(dmu_buf_impl_t *db)
 315 {
 316         dnode_t *dn;
 317         dbuf_dirty_record_t *dr;
 318 
 319         ASSERT(MUTEX_HELD(&db->db_mtx));
 320 
 321         if (!(zfs_flags & ZFS_DEBUG_DBUF_VERIFY))
 322                 return;
 323 
 324         ASSERT(db->db_objset != NULL);


1723         } else {
1724                 /* the block is referenced from the dnode */
1725                 ASSERT3U(level, ==, nlevels-1);
1726                 ASSERT(dn->dn_phys->dn_nblkptr == 0 ||
1727                     blkid < dn->dn_phys->dn_nblkptr);
1728                 if (dn->dn_dbuf) {
1729                         dbuf_add_ref(dn->dn_dbuf, NULL);
1730                         *parentp = dn->dn_dbuf;
1731                 }
1732                 *bpp = &dn->dn_phys->dn_blkptr[blkid];
1733                 return (0);
1734         }
1735 }
1736 
1737 static dmu_buf_impl_t *
1738 dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
1739     dmu_buf_impl_t *parent, blkptr_t *blkptr)
1740 {
1741         objset_t *os = dn->dn_objset;
1742         dmu_buf_impl_t *db, *odb;

1743 
1744         ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
1745         ASSERT(dn->dn_type != DMU_OT_NONE);
1746 
1747         db = kmem_cache_alloc(dbuf_cache, KM_SLEEP);
1748 
1749         db->db_objset = os;
1750         db->db.db_object = dn->dn_object;
1751         db->db_level = level;
1752         db->db_blkid = blkid;
1753         db->db_last_dirty = NULL;
1754         db->db_dirtycnt = 0;
1755         db->db_dnode_handle = dn->dn_handle;
1756         db->db_parent = parent;
1757         db->db_blkptr = blkptr;
1758 
1759         db->db_user_ptr = NULL;
1760         db->db_user_data_ptr_ptr = NULL;
1761         db->db_evict_func = NULL;
1762         db->db_immediate_evict = 0;
1763         db->db_freed_in_flight = 0;
1764 
1765         if (blkid == DMU_BONUS_BLKID) {
1766                 ASSERT3P(parent, ==, dn->dn_dbuf);
1767                 db->db.db_size = DN_MAX_BONUSLEN -
1768                     (dn->dn_nblkptr-1) * sizeof (blkptr_t);
1769                 ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen);
1770                 db->db.db_offset = DMU_BONUS_BLKID;
1771                 db->db_state = DB_UNCACHED;
1772                 /* the bonus dbuf is not placed in the hash table */
1773                 arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
1774                 return (db);
1775         } else if (blkid == DMU_SPILL_BLKID) {
1776                 db->db.db_size = (blkptr != NULL) ?
1777                     BP_GET_LSIZE(blkptr) : SPA_MINBLOCKSIZE;
1778                 db->db.db_offset = 0;
1779         } else {
1780                 int blocksize =
1781                     db->db_level ? 1 << dn->dn_indblkshift : dn->dn_datablksz;
1782                 db->db.db_size = blocksize;
1783                 db->db.db_offset = db->db_blkid * blocksize;
1784         }
1785 
1786         /*
1787          * Hold the dn_dbufs_mtx while we get the new dbuf
1788          * in the hash table *and* added to the dbufs list.
1789          * This prevents a possible deadlock with someone
1790          * trying to look up this dbuf before its added to the
1791          * dn_dbufs list.
1792          */
1793         mutex_enter(&dn->dn_dbufs_mtx);

1794         db->db_state = DB_EVICTING;
1795         if ((odb = dbuf_hash_insert(db)) != NULL) {
1796                 /* someone else inserted it first */

1797                 kmem_cache_free(dbuf_cache, db);

1798                 mutex_exit(&dn->dn_dbufs_mtx);
1799                 return (odb);
1800         }
1801         avl_add(&dn->dn_dbufs, db);
1802         if (db->db_level == 0 && db->db_blkid >=
1803             dn->dn_unlisted_l0_blkid)
1804                 dn->dn_unlisted_l0_blkid = db->db_blkid + 1;
1805         db->db_state = DB_UNCACHED;
1806         mutex_exit(&dn->dn_dbufs_mtx);
1807         arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
1808 
1809         if (parent && parent != dn->dn_dbuf)
1810                 dbuf_add_ref(parent, db);
1811 
1812         ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT ||
1813             refcount_count(&dn->dn_holds) > 0);
1814         (void) refcount_add(&dn->dn_holds, db);
1815         atomic_inc_32(&dn->dn_dbufs_count);
1816 
1817         dprintf_dbuf(db, "db=%p\n", db);
1818 
1819         return (db);
1820 }
1821 


1853                  */
1854                 if (db->db_dnode_handle != NULL) {
1855                         dnode_t *dn;
1856 
1857                         DB_DNODE_ENTER(db);
1858                         dn = DB_DNODE(db);
1859                         mutex_enter(&dn->dn_dbufs_mtx);
1860                         avl_remove(&dn->dn_dbufs, db);
1861                         atomic_dec_32(&dn->dn_dbufs_count);
1862                         mutex_exit(&dn->dn_dbufs_mtx);
1863                         DB_DNODE_EXIT(db);
1864                         /*
1865                          * Decrementing the dbuf count means that the hold
1866                          * corresponding to the removed dbuf is no longer
1867                          * discounted in dnode_move(), so the dnode cannot be
1868                          * moved until after we release the hold.
1869                          */
1870                         dnode_rele(dn, db);
1871                         db->db_dnode_handle = NULL;
1872                 }
1873                 dbuf_hash_remove(db);
1874         }
1875         db->db_parent = NULL;
1876         db->db_buf = NULL;
1877 
1878         ASSERT(db->db.db_data == NULL);
1879         ASSERT(db->db_hash_next == NULL);
1880         ASSERT(db->db_blkptr == NULL);
1881         ASSERT(db->db_data_pending == NULL);
1882 
1883         kmem_cache_free(dbuf_cache, db);
1884         arc_space_return(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
1885 }
1886 
1887 void
1888 dbuf_prefetch(dnode_t *dn, uint64_t blkid, zio_priority_t prio)
1889 {
1890         dmu_buf_impl_t *db = NULL;
1891         blkptr_t *bp = NULL;
1892 
1893         ASSERT(blkid != DMU_BONUS_BLKID);
1894         ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
1895 
1896         if (dnode_block_freed(dn, blkid))
1897                 return;
1898 
1899         /* dbuf_find() returns with db_mtx held */




  67         dmu_buf_impl_t *db = vdb;
  68         bzero(db, sizeof (dmu_buf_impl_t));
  69 
  70         mutex_init(&db->db_mtx, NULL, MUTEX_DEFAULT, NULL);
  71         cv_init(&db->db_changed, NULL, CV_DEFAULT, NULL);
  72         refcount_create(&db->db_holds);
  73 
  74         return (0);
  75 }
  76 
  77 /* ARGSUSED */
  78 static void
  79 dbuf_dest(void *vdb, void *unused)
  80 {
  81         dmu_buf_impl_t *db = vdb;
  82         mutex_destroy(&db->db_mtx);
  83         cv_destroy(&db->db_changed);
  84         refcount_destroy(&db->db_holds);
  85 }
  86 



































  87 dmu_buf_impl_t *
  88 dbuf_find(dnode_t *dn, uint8_t level, uint64_t blkid)
  89 {

  90         objset_t *os = dn->dn_objset;
  91         uint64_t obj = dn->dn_object;


  92         dmu_buf_impl_t *db;
  93         dmu_buf_impl_t key;
  94         avl_index_t where;
  95 
  96         key.db_level = level;
  97         key.db_blkid = blkid;
  98         key.db_state = DB_SEARCH;
  99 
 100         mutex_enter(&dn->dn_dbufs_mtx);
 101         db = avl_find(&dn->dn_dbufs, &key, &where);
 102         ASSERT3P(db, ==, NULL);
 103         db = avl_nearest(&dn->dn_dbufs, where, AVL_AFTER);
 104 
 105         for (; db; db = AVL_NEXT(&dn->dn_dbufs, db)) {
 106                 if ((db->db_level != level) || (db->db_blkid != blkid))
 107                         break;
 108 



 109                 mutex_enter(&db->db_mtx);
 110                 if (db->db_state != DB_EVICTING) {
 111                         mutex_exit(&dn->dn_dbufs_mtx);
 112                         return (db);
 113                 }
 114                 mutex_exit(&db->db_mtx);
 115         }








































 116 
 117         mutex_exit(&dn->dn_dbufs_mtx);
 118         return (NULL);
 119 }
 120 
































 121 static arc_evict_func_t dbuf_do_evict;
 122 
 123 static void
 124 dbuf_evict_user(dmu_buf_impl_t *db)
 125 {
 126         ASSERT(MUTEX_HELD(&db->db_mtx));
 127 
 128         if (db->db_level != 0 || db->db_evict_func == NULL)
 129                 return;
 130 
 131         if (db->db_user_data_ptr_ptr)
 132                 *db->db_user_data_ptr_ptr = db->db.db_data;
 133         db->db_evict_func(&db->db, db->db_user_ptr);
 134         db->db_user_ptr = NULL;
 135         db->db_user_data_ptr_ptr = NULL;
 136         db->db_evict_func = NULL;
 137 }
 138 
 139 boolean_t
 140 dbuf_is_metadata(dmu_buf_impl_t *db)


 149                 DB_DNODE_EXIT(db);
 150 
 151                 return (is_metadata);
 152         }
 153 }
 154 
 155 void
 156 dbuf_evict(dmu_buf_impl_t *db)
 157 {
 158         ASSERT(MUTEX_HELD(&db->db_mtx));
 159         ASSERT(db->db_buf == NULL);
 160         ASSERT(db->db_data_pending == NULL);
 161 
 162         dbuf_clear(db);
 163         dbuf_destroy(db);
 164 }
 165 
 166 void
 167 dbuf_init(void)
 168 {






















 169         dbuf_cache = kmem_cache_create("dmu_buf_impl_t",
 170             sizeof (dmu_buf_impl_t),
 171             0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0);



 172 }
 173 
 174 void
 175 dbuf_fini(void)
 176 {






 177         kmem_cache_destroy(dbuf_cache);
 178 }
 179 
 180 /*
 181  * Other stuff.
 182  */
 183 
 184 #ifdef ZFS_DEBUG
 185 static void
 186 dbuf_verify(dmu_buf_impl_t *db)
 187 {
 188         dnode_t *dn;
 189         dbuf_dirty_record_t *dr;
 190 
 191         ASSERT(MUTEX_HELD(&db->db_mtx));
 192 
 193         if (!(zfs_flags & ZFS_DEBUG_DBUF_VERIFY))
 194                 return;
 195 
 196         ASSERT(db->db_objset != NULL);


1595         } else {
1596                 /* the block is referenced from the dnode */
1597                 ASSERT3U(level, ==, nlevels-1);
1598                 ASSERT(dn->dn_phys->dn_nblkptr == 0 ||
1599                     blkid < dn->dn_phys->dn_nblkptr);
1600                 if (dn->dn_dbuf) {
1601                         dbuf_add_ref(dn->dn_dbuf, NULL);
1602                         *parentp = dn->dn_dbuf;
1603                 }
1604                 *bpp = &dn->dn_phys->dn_blkptr[blkid];
1605                 return (0);
1606         }
1607 }
1608 
1609 static dmu_buf_impl_t *
1610 dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
1611     dmu_buf_impl_t *parent, blkptr_t *blkptr)
1612 {
1613         objset_t *os = dn->dn_objset;
1614         dmu_buf_impl_t *db, *odb;
1615         avl_index_t where;
1616 
1617         ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
1618         ASSERT(dn->dn_type != DMU_OT_NONE);
1619 
1620         db = kmem_cache_alloc(dbuf_cache, KM_SLEEP);
1621 
1622         db->db_objset = os;
1623         db->db.db_object = dn->dn_object;
1624         db->db_level = level;
1625         db->db_blkid = blkid;
1626         db->db_last_dirty = NULL;
1627         db->db_dirtycnt = 0;
1628         db->db_dnode_handle = dn->dn_handle;
1629         db->db_parent = parent;
1630         db->db_blkptr = blkptr;
1631 
1632         db->db_user_ptr = NULL;
1633         db->db_user_data_ptr_ptr = NULL;
1634         db->db_evict_func = NULL;
1635         db->db_immediate_evict = 0;
1636         db->db_freed_in_flight = 0;
1637 
1638         if (blkid == DMU_BONUS_BLKID) {
1639                 ASSERT3P(parent, ==, dn->dn_dbuf);
1640                 db->db.db_size = DN_MAX_BONUSLEN -
1641                     (dn->dn_nblkptr-1) * sizeof (blkptr_t);
1642                 ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen);
1643                 db->db.db_offset = DMU_BONUS_BLKID;
1644                 db->db_state = DB_UNCACHED;
1645                 /* the bonus dbuf is not placed into the dnode's dbuf tree */
1646                 arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
1647                 return (db);
1648         } else if (blkid == DMU_SPILL_BLKID) {
1649                 db->db.db_size = (blkptr != NULL) ?
1650                     BP_GET_LSIZE(blkptr) : SPA_MINBLOCKSIZE;
1651                 db->db.db_offset = 0;
1652         } else {
1653                 int blocksize =
1654                     db->db_level ? 1 << dn->dn_indblkshift : dn->dn_datablksz;
1655                 db->db.db_size = blocksize;
1656                 db->db.db_offset = db->db_blkid * blocksize;
1657         }
1658 







1659         mutex_enter(&dn->dn_dbufs_mtx);
1660         mutex_enter(&db->db_mtx);
1661         db->db_state = DB_EVICTING;
1662         if ((odb = avl_find(&dn->dn_dbufs, db, &where))) {
1663                 /* someone else inserted it first */
1664                 mutex_exit(&db->db_mtx);
1665                 kmem_cache_free(dbuf_cache, db);
1666                 mutex_enter(&odb->db_mtx);
1667                 mutex_exit(&dn->dn_dbufs_mtx);
1668                 return (odb);
1669         }
1670         avl_insert(&dn->dn_dbufs, db, where);
1671         if (db->db_level == 0 && db->db_blkid >=
1672             dn->dn_unlisted_l0_blkid)
1673                 dn->dn_unlisted_l0_blkid = db->db_blkid + 1;
1674         db->db_state = DB_UNCACHED;
1675         mutex_exit(&dn->dn_dbufs_mtx);
1676         arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
1677 
1678         if (parent && parent != dn->dn_dbuf)
1679                 dbuf_add_ref(parent, db);
1680 
1681         ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT ||
1682             refcount_count(&dn->dn_holds) > 0);
1683         (void) refcount_add(&dn->dn_holds, db);
1684         atomic_inc_32(&dn->dn_dbufs_count);
1685 
1686         dprintf_dbuf(db, "db=%p\n", db);
1687 
1688         return (db);
1689 }
1690 


1722                  */
1723                 if (db->db_dnode_handle != NULL) {
1724                         dnode_t *dn;
1725 
1726                         DB_DNODE_ENTER(db);
1727                         dn = DB_DNODE(db);
1728                         mutex_enter(&dn->dn_dbufs_mtx);
1729                         avl_remove(&dn->dn_dbufs, db);
1730                         atomic_dec_32(&dn->dn_dbufs_count);
1731                         mutex_exit(&dn->dn_dbufs_mtx);
1732                         DB_DNODE_EXIT(db);
1733                         /*
1734                          * Decrementing the dbuf count means that the hold
1735                          * corresponding to the removed dbuf is no longer
1736                          * discounted in dnode_move(), so the dnode cannot be
1737                          * moved until after we release the hold.
1738                          */
1739                         dnode_rele(dn, db);
1740                         db->db_dnode_handle = NULL;
1741                 }

1742         }
1743         db->db_parent = NULL;
1744         db->db_buf = NULL;
1745 
1746         ASSERT(db->db.db_data == NULL);

1747         ASSERT(db->db_blkptr == NULL);
1748         ASSERT(db->db_data_pending == NULL);
1749 
1750         kmem_cache_free(dbuf_cache, db);
1751         arc_space_return(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
1752 }
1753 
1754 void
1755 dbuf_prefetch(dnode_t *dn, uint64_t blkid, zio_priority_t prio)
1756 {
1757         dmu_buf_impl_t *db = NULL;
1758         blkptr_t *bp = NULL;
1759 
1760         ASSERT(blkid != DMU_BONUS_BLKID);
1761         ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
1762 
1763         if (dnode_block_freed(dn, blkid))
1764                 return;
1765 
1766         /* dbuf_find() returns with db_mtx held */