aboutsummaryrefslogtreecommitdiff
path: root/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys
diff options
context:
space:
mode:
authorPawel Jakub Dawidek <pjd@FreeBSD.org>2008-11-17 20:49:29 +0000
committerPawel Jakub Dawidek <pjd@FreeBSD.org>2008-11-17 20:49:29 +0000
commit1ba4a712dde6e6c613fc411a96958b4ade67de4c (patch)
tree81b89fa4ac6467771d5aa291a97f4665981a6108 /sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys
parent8fc061164d74a4c9775f39da3c0b5d02112866c8 (diff)
downloadsrc-1ba4a712dde6e6c613fc411a96958b4ade67de4c.tar.gz
src-1ba4a712dde6e6c613fc411a96958b4ade67de4c.zip
Update ZFS from version 6 to 13 and bring some FreeBSD-specific changes.
This bring huge amount of changes, I'll enumerate only user-visible changes: - Delegated Administration Allows regular users to perform ZFS operations, like file system creation, snapshot creation, etc. - L2ARC Level 2 cache for ZFS - allows to use additional disks for cache. Huge performance improvements mostly for random read of mostly static content. - slog Allow to use additional disks for ZFS Intent Log to speed up operations like fsync(2). - vfs.zfs.super_owner Allows regular users to perform privileged operations on files stored on ZFS file systems owned by him. Very careful with this one. - chflags(2) Not all the flags are supported. This still needs work. - ZFSBoot Support to boot off of ZFS pool. Not finished, AFAIK. Submitted by: dfr - Snapshot properties - New failure modes Before if write requested failed, system paniced. Now one can select from one of three failure modes: - panic - panic on write error - wait - wait for disk to reappear - continue - serve read requests if possible, block write requests - Refquota, refreservation properties Just quota and reservation properties, but don't count space consumed by children file systems, clones and snapshots. - Sparse volumes ZVOLs that don't reserve space in the pool. - External attributes Compatible with extattr(2). - NFSv4-ACLs Not sure about the status, might not be complete yet. Submitted by: trasz - Creation-time properties - Regression tests for zpool(8) command. Obtained from: OpenSolaris
Notes
Notes: svn path=/head/; revision=185029
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h56
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/bplist.h10
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h25
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h101
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_impl.h14
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h25
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_traverse.h3
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_tx.h6
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h26
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h108
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_deleg.h73
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h41
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h54
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_prop.h13
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_synctask.h10
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h12
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/refcount.h6
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/rrwlock.h79
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h131
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_boot.h45
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h112
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h20
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg_impl.h9
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/uberblock_impl.h4
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/unique.h13
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h52
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_disk.h12
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h43
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap.h76
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_impl.h40
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_leaf.h22
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_acl.h160
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_context.h24
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h10
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_dir.h13
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_fuid.h125
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h72
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h63
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h162
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h144
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h20
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h216
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_checksum.h6
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h172
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zvol.h12
45 files changed, 1787 insertions, 653 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
index f58ffc059f91..f3e00877a8e2 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ARC_H
#define _SYS_ARC_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/zfs_context.h>
#ifdef __cplusplus
@@ -35,11 +33,12 @@ extern "C" {
#endif
#include <sys/zio.h>
+#include <sys/dmu.h>
+#include <sys/spa.h>
typedef struct arc_buf_hdr arc_buf_hdr_t;
typedef struct arc_buf arc_buf_t;
typedef void arc_done_func_t(zio_t *zio, arc_buf_t *buf, void *private);
-typedef void arc_byteswap_func_t(void *buf, size_t size);
typedef int arc_evict_func_t(void *private);
/* generic arc_done_func_t's which you can use */
@@ -49,15 +48,16 @@ arc_done_func_t arc_getbuf_func;
struct arc_buf {
arc_buf_hdr_t *b_hdr;
arc_buf_t *b_next;
+ krwlock_t b_lock;
void *b_data;
arc_evict_func_t *b_efunc;
void *b_private;
};
typedef enum arc_buf_contents {
- ARC_BUFC_UNDEF, /* buffer contents undefined */
ARC_BUFC_DATA, /* buffer contains data */
- ARC_BUFC_METADATA /* buffer contains metadata */
+ ARC_BUFC_METADATA, /* buffer contains metadata */
+ ARC_BUFC_NUMTYPES
} arc_buf_contents_t;
/*
* These are the flags we pass into calls to the arc
@@ -66,7 +66,12 @@ typedef enum arc_buf_contents {
#define ARC_NOWAIT (1 << 2) /* perform I/O asynchronously */
#define ARC_PREFETCH (1 << 3) /* I/O is a prefetch */
#define ARC_CACHED (1 << 4) /* I/O was already in cache */
+#define ARC_L2CACHE (1 << 5) /* cache in L2ARC */
+void arc_space_consume(uint64_t space);
+void arc_space_return(uint64_t space);
+void *arc_data_buf_alloc(uint64_t space);
+void arc_data_buf_free(void *buf, uint64_t space);
arc_buf_t *arc_buf_alloc(spa_t *spa, int size, void *tag,
arc_buf_contents_t type);
void arc_buf_add_ref(arc_buf_t *buf, void *tag);
@@ -81,13 +86,24 @@ void arc_buf_thaw(arc_buf_t *buf);
int arc_referenced(arc_buf_t *buf);
#endif
-int arc_read(zio_t *pio, spa_t *spa, blkptr_t *bp, arc_byteswap_func_t *swap,
+typedef struct writeprops {
+ dmu_object_type_t wp_type;
+ uint8_t wp_level;
+ uint8_t wp_copies;
+ uint8_t wp_dncompress, wp_oscompress;
+ uint8_t wp_dnchecksum, wp_oschecksum;
+} writeprops_t;
+
+int arc_read(zio_t *pio, spa_t *spa, blkptr_t *bp, arc_buf_t *pbuf,
+ arc_done_func_t *done, void *private, int priority, int zio_flags,
+ uint32_t *arc_flags, const zbookmark_t *zb);
+int arc_read_nolock(zio_t *pio, spa_t *spa, blkptr_t *bp,
arc_done_func_t *done, void *private, int priority, int flags,
- uint32_t *arc_flags, zbookmark_t *zb);
-zio_t *arc_write(zio_t *pio, spa_t *spa, int checksum, int compress,
- int ncopies, uint64_t txg, blkptr_t *bp, arc_buf_t *buf,
+ uint32_t *arc_flags, const zbookmark_t *zb);
+zio_t *arc_write(zio_t *pio, spa_t *spa, const writeprops_t *wp,
+ boolean_t l2arc, uint64_t txg, blkptr_t *bp, arc_buf_t *buf,
arc_done_func_t *ready, arc_done_func_t *done, void *private, int priority,
- int flags, zbookmark_t *zb);
+ int zio_flags, const zbookmark_t *zb);
int arc_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
zio_done_func_t *done, void *private, uint32_t arc_flags);
int arc_tryread(spa_t *spa, blkptr_t *bp, void *data);
@@ -95,13 +111,25 @@ int arc_tryread(spa_t *spa, blkptr_t *bp, void *data);
void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private);
int arc_buf_evict(arc_buf_t *buf);
-void arc_flush(void);
-void arc_tempreserve_clear(uint64_t tempreserve);
-int arc_tempreserve_space(uint64_t tempreserve);
+void arc_flush(spa_t *spa);
+void arc_tempreserve_clear(uint64_t reserve);
+int arc_tempreserve_space(uint64_t reserve, uint64_t txg);
void arc_init(void);
void arc_fini(void);
+/*
+ * Level 2 ARC
+ */
+
+void l2arc_add_vdev(spa_t *spa, vdev_t *vd, uint64_t start, uint64_t end);
+void l2arc_remove_vdev(vdev_t *vd);
+boolean_t l2arc_vdev_present(vdev_t *vd);
+void l2arc_init(void);
+void l2arc_fini(void);
+void l2arc_start(void);
+void l2arc_stop(void);
+
#ifdef __cplusplus
}
#endif
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/bplist.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/bplist.h
index b4c83765c873..cdb93a6c35a3 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/bplist.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/bplist.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_BPLIST_H
#define _SYS_BPLIST_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/dmu.h>
#include <sys/spa.h>
#include <sys/txg.h>
@@ -75,12 +73,14 @@ extern int bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object);
extern void bplist_close(bplist_t *bpl);
extern boolean_t bplist_empty(bplist_t *bpl);
extern int bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp);
-extern int bplist_enqueue(bplist_t *bpl, blkptr_t *bp, dmu_tx_t *tx);
-extern void bplist_enqueue_deferred(bplist_t *bpl, blkptr_t *bp);
+extern int bplist_enqueue(bplist_t *bpl, const blkptr_t *bp, dmu_tx_t *tx);
+extern void bplist_enqueue_deferred(bplist_t *bpl, const blkptr_t *bp);
extern void bplist_sync(bplist_t *bpl, dmu_tx_t *tx);
extern void bplist_vacate(bplist_t *bpl, dmu_tx_t *tx);
extern int bplist_space(bplist_t *bpl,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
+extern int bplist_space_birthrange(bplist_t *bpl,
+ uint64_t mintxg, uint64_t maxtxg, uint64_t *dasizep);
#ifdef __cplusplus
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h
index d33657b9e67c..b27d89fe2162 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -239,7 +239,7 @@ typedef struct dbuf_hash_table {
uint64_t dbuf_whichblock(struct dnode *di, uint64_t offset);
dmu_buf_impl_t *dbuf_create_tlib(struct dnode *dn, char *data);
-dmu_buf_impl_t *dbuf_create_bonus(struct dnode *dn);
+void dbuf_create_bonus(struct dnode *dn);
dmu_buf_impl_t *dbuf_hold(struct dnode *dn, uint64_t blkid, void *tag);
dmu_buf_impl_t *dbuf_hold_level(struct dnode *dn, int level, uint64_t blkid,
@@ -271,7 +271,7 @@ void dbuf_setdirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
void dbuf_unoverride(dbuf_dirty_record_t *dr);
void dbuf_sync_list(list_t *list, dmu_tx_t *tx);
-void dbuf_free_range(struct dnode *dn, uint64_t blkid, uint64_t nblks,
+void dbuf_free_range(struct dnode *dn, uint64_t start, uint64_t end,
struct dmu_tx *);
void dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx);
@@ -279,10 +279,21 @@ void dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx);
void dbuf_init(void);
void dbuf_fini(void);
-#define DBUF_GET_BUFC_TYPE(db) \
- ((((db)->db_level > 0) || \
- (dmu_ot[(db)->db_dnode->dn_type].ot_metadata)) ? \
- ARC_BUFC_METADATA : ARC_BUFC_DATA);
+#define DBUF_IS_METADATA(db) \
+ ((db)->db_level > 0 || dmu_ot[(db)->db_dnode->dn_type].ot_metadata)
+
+#define DBUF_GET_BUFC_TYPE(db) \
+ (DBUF_IS_METADATA(db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA)
+
+#define DBUF_IS_CACHEABLE(db) \
+ ((db)->db_objset->os_primary_cache == ZFS_CACHE_ALL || \
+ (DBUF_IS_METADATA(db) && \
+ ((db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA)))
+
+#define DBUF_IS_L2CACHEABLE(db) \
+ ((db)->db_objset->os_secondary_cache == ZFS_CACHE_ALL || \
+ (DBUF_IS_METADATA(db) && \
+ ((db)->db_objset->os_secondary_cache == ZFS_CACHE_METADATA)))
#ifdef ZFS_DEBUG
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
index 8c2a1fdaa823..4535c6864074 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -38,6 +38,7 @@
#include <sys/types.h>
#include <sys/param.h>
+#include <sys/cred.h>
#ifdef __cplusplus
extern "C" {
@@ -91,7 +92,7 @@ typedef enum dmu_object_type {
DMU_OT_DSL_DATASET, /* UINT64 */
/* zpl: */
DMU_OT_ZNODE, /* ZNODE */
- DMU_OT_ACL, /* ACL */
+ DMU_OT_OLDACL, /* Old ACL */
DMU_OT_PLAIN_FILE_CONTENTS, /* UINT8 */
DMU_OT_DIRECTORY_CONTENTS, /* ZAP */
DMU_OT_MASTER_NODE, /* ZAP */
@@ -108,7 +109,13 @@ typedef enum dmu_object_type {
DMU_OT_SPA_HISTORY, /* UINT8 */
DMU_OT_SPA_HISTORY_OFFSETS, /* spa_his_phys_t */
DMU_OT_POOL_PROPS, /* ZAP */
-
+ DMU_OT_DSL_PERMS, /* ZAP */
+ DMU_OT_ACL, /* ACL */
+ DMU_OT_SYSACL, /* SYSACL */
+ DMU_OT_FUID, /* FUID table (Packed NVLIST UINT8) */
+ DMU_OT_FUID_SIZE, /* FUID table size UINT64 */
+ DMU_OT_NEXT_CLONES, /* ZAP */
+ DMU_OT_SCRUB_QUEUE, /* ZAP */
DMU_OT_NUMTYPES
} dmu_object_type_t;
@@ -127,15 +134,15 @@ void byteswap_uint32_array(void *buf, size_t size);
void byteswap_uint16_array(void *buf, size_t size);
void byteswap_uint8_array(void *buf, size_t size);
void zap_byteswap(void *buf, size_t size);
+void zfs_oldacl_byteswap(void *buf, size_t size);
void zfs_acl_byteswap(void *buf, size_t size);
void zfs_znode_byteswap(void *buf, size_t size);
-#define DS_MODE_NONE 0 /* invalid, to aid debugging */
-#define DS_MODE_STANDARD 1 /* normal access, no special needs */
-#define DS_MODE_PRIMARY 2 /* the "main" access, e.g. a mount */
-#define DS_MODE_EXCLUSIVE 3 /* exclusive access, e.g. to destroy */
-#define DS_MODE_LEVELS 4
-#define DS_MODE_LEVEL(x) ((x) & (DS_MODE_LEVELS - 1))
+#define DS_MODE_NOHOLD 0 /* internal use only */
+#define DS_MODE_USER 1 /* simple access, no special needs */
+#define DS_MODE_OWNER 2 /* the "main" access, e.g. a mount */
+#define DS_MODE_TYPE_MASK 0x3
+#define DS_MODE_TYPE(x) ((x) & DS_MODE_TYPE_MASK)
#define DS_MODE_READONLY 0x8
#define DS_MODE_IS_READONLY(x) ((x) & DS_MODE_READONLY)
#define DS_MODE_INCONSISTENT 0x10
@@ -149,20 +156,23 @@ void zfs_znode_byteswap(void *buf, size_t size);
* operation, including metadata.
*/
#define DMU_MAX_ACCESS (10<<20) /* 10MB */
+#define DMU_MAX_DELETEBLKCNT (20480) /* ~5MB of indirect blocks */
/*
* Public routines to create, destroy, open, and close objsets.
*/
int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
objset_t **osp);
+int dmu_objset_open_ds(struct dsl_dataset *ds, dmu_objset_type_t type,
+ objset_t **osp);
void dmu_objset_close(objset_t *os);
-int dmu_objset_evict_dbufs(objset_t *os, int try);
+int dmu_objset_evict_dbufs(objset_t *os);
int dmu_objset_create(const char *name, dmu_objset_type_t type,
- objset_t *clone_parent,
- void (*func)(objset_t *os, void *arg, dmu_tx_t *tx), void *arg);
+ objset_t *clone_parent, uint64_t flags,
+ void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
int dmu_objset_destroy(const char *name);
int dmu_snapshots_destroy(char *fsname, char *snapname);
-int dmu_objset_rollback(const char *name);
+int dmu_objset_rollback(objset_t *os);
int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive);
int dmu_objset_rename(const char *name, const char *newname,
boolean_t recursive);
@@ -180,11 +190,6 @@ typedef struct dmu_buf {
typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
/*
- * Callback function to perform byte swapping on a block.
- */
-typedef void dmu_byteswap_func_t(void *buf, size_t size);
-
-/*
* The names of zap entries in the DIRECTORY_OBJECT of the MOS.
*/
#define DMU_POOL_DIRECTORY_OBJECT 1
@@ -197,6 +202,20 @@ typedef void dmu_byteswap_func_t(void *buf, size_t size);
#define DMU_POOL_DEFLATE "deflate"
#define DMU_POOL_HISTORY "history"
#define DMU_POOL_PROPS "pool_props"
+#define DMU_POOL_L2CACHE "l2cache"
+
+/* 4x8 zbookmark_t */
+#define DMU_POOL_SCRUB_BOOKMARK "scrub_bookmark"
+/* 1x8 zap obj DMU_OT_SCRUB_QUEUE */
+#define DMU_POOL_SCRUB_QUEUE "scrub_queue"
+/* 1x8 txg */
+#define DMU_POOL_SCRUB_MIN_TXG "scrub_min_txg"
+/* 1x8 txg */
+#define DMU_POOL_SCRUB_MAX_TXG "scrub_max_txg"
+/* 1x4 enum scrub_func */
+#define DMU_POOL_SCRUB_FUNC "scrub_func"
+/* 1x8 count */
+#define DMU_POOL_SCRUB_ERRORS "scrub_errors"
/*
* Allocate an object from this objset. The range of object numbers
@@ -298,6 +317,7 @@ int dmu_get_replication_level(struct objset_impl *, struct zbookmark *zb,
*/
int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **);
int dmu_bonus_max(void);
+int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
/*
* Obtain the DMU buffer from the specified object which contains the
@@ -417,6 +437,9 @@ void dmu_tx_commit(dmu_tx_t *tx);
*/
int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
uint64_t size, dmu_tx_t *tx);
+int dmu_free_long_range(objset_t *os, uint64_t object, uint64_t offset,
+ uint64_t size);
+int dmu_free_object(objset_t *os, uint64_t object);
/*
* Convenience functions.
@@ -458,8 +481,10 @@ typedef struct dmu_object_info {
uint64_t doi_max_block_offset;
} dmu_object_info_t;
+typedef void arc_byteswap_func_t(void *buf, size_t size);
+
typedef struct dmu_object_type_info {
- dmu_byteswap_func_t *ot_byteswap;
+ arc_byteswap_func_t *ot_byteswap;
boolean_t ot_metadata;
char *ot_name;
} dmu_object_type_info_t;
@@ -482,10 +507,11 @@ void dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize,
typedef struct dmu_objset_stats {
uint64_t dds_num_clones; /* number of clones of this */
uint64_t dds_creation_txg;
+ uint64_t dds_guid;
dmu_objset_type_t dds_type;
uint8_t dds_is_snapshot;
uint8_t dds_inconsistent;
- char dds_clone_of[MAXNAMELEN];
+ char dds_origin[MAXNAMELEN];
} dmu_objset_stats_t;
/*
@@ -531,9 +557,13 @@ extern void dmu_objset_name(objset_t *os, char *buf);
extern dmu_objset_type_t dmu_objset_type(objset_t *os);
extern uint64_t dmu_objset_id(objset_t *os);
extern int dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
- uint64_t *id, uint64_t *offp);
+ uint64_t *id, uint64_t *offp, boolean_t *case_conflict);
+extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,
+ int maxlen, boolean_t *conflict);
extern int dmu_dir_list_next(objset_t *os, int namelen, char *name,
uint64_t *idp, uint64_t *offp);
+extern void dmu_objset_set_user(objset_t *os, void *user_ptr);
+extern void *dmu_objset_get_user(objset_t *os);
/*
* Return the txg number for the given assigned transaction.
@@ -544,7 +574,7 @@ uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
* Synchronous write.
* If a parent zio is provided this function initiates a write on the
* provided buffer as a child of the parent zio.
- * In the absense of a parent zio, the write is completed synchronously.
+ * In the absence of a parent zio, the write is completed synchronously.
* At write completion, blk is filled with the bp of the written block.
* Note that while the data covered by this function will be on stable
* storage when the write completes this new data does not become a
@@ -572,9 +602,30 @@ typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp,
void dmu_traverse_objset(objset_t *os, uint64_t txg_start,
dmu_traverse_cb_t cb, void *arg);
-int dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, struct file *fp);
-int dmu_recvbackup(char *tosnap, struct drr_begin *drrb, uint64_t *sizep,
- boolean_t force, struct file *fp, uint64_t voffset);
+int dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
+ struct file *fp, offset_t *off);
+
+typedef struct dmu_recv_cookie {
+ /*
+ * This structure is opaque!
+ *
+ * If logical and real are different, we are recving the stream
+ * into the "real" temporary clone, and then switching it with
+ * the "logical" target.
+ */
+ struct dsl_dataset *drc_logical_ds;
+ struct dsl_dataset *drc_real_ds;
+ struct drr_begin *drc_drrb;
+ char *drc_tosnap;
+ boolean_t drc_newfs;
+ boolean_t drc_force;
+} dmu_recv_cookie_t;
+
+int dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *,
+ boolean_t force, objset_t *origin, boolean_t online, dmu_recv_cookie_t *);
+int dmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp);
+int dmu_recv_end(dmu_recv_cookie_t *drc);
+void dmu_recv_abort_cleanup(dmu_recv_cookie_t *drc);
/* CRC64 table */
#define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_impl.h
index 807011e94ffc..96ce688e1551 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_impl.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DMU_IMPL_H
#define _SYS_DMU_IMPL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/txg_impl.h>
#include <sys/zio.h>
#include <sys/dnode.h>
@@ -51,7 +49,7 @@ extern "C" {
* XXX try to improve evicting path?
*
* dp_config_rwlock > os_obj_lock > dn_struct_rwlock >
- * dn_dbufs_mtx > hash_mutexes > db_mtx > leafs
+ * dn_dbufs_mtx > hash_mutexes > db_mtx > dd_lock > leafs
*
* dp_config_rwlock
* must be held before: everything
@@ -177,7 +175,10 @@ extern "C" {
* dmu_tx_try_assign: dn_notxholds(cv)
* dmu_tx_unassign: none
*
- * dd_lock (leaf)
+ * dd_lock
+ * must be held before:
+ * ds_lock
+ * ancestors' dd_lock
* protects:
* dd_prop_cbs
* dd_sync_*
@@ -207,13 +208,14 @@ extern "C" {
* dnode_setdirty: none (dn_dirtyblksz, os_*_dnodes)
* dnode_free: none (dn_dirtyblksz, os_*_dnodes)
*
- * ds_lock (leaf)
+ * ds_lock
* protects:
* ds_user_ptr
* ds_user_evice_func
* ds_open_refcount
* ds_snapname
* ds_phys accounting
+ * ds_reserved
* held from:
* dsl_dataset_*
*
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h
index 8293a3b4076a..15df29a17799 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -69,12 +69,13 @@ typedef struct objset_impl {
uint8_t os_checksum; /* can change, under dsl_dir's locks */
uint8_t os_compress; /* can change, under dsl_dir's locks */
uint8_t os_copies; /* can change, under dsl_dir's locks */
- uint8_t os_md_checksum;
- uint8_t os_md_compress;
+ uint8_t os_primary_cache; /* can change, under dsl_dir's locks */
+ uint8_t os_secondary_cache; /* can change, under dsl_dir's locks */
/* no lock needed: */
struct dmu_tx *os_synctx; /* XXX sketchy */
blkptr_t *os_rootbp;
+ zil_header_t os_zil_header;
/* Protected by os_obj_lock */
kmutex_t os_obj_lock;
@@ -86,19 +87,27 @@ typedef struct objset_impl {
list_t os_free_dnodes[TXG_SIZE];
list_t os_dnodes;
list_t os_downgraded_dbufs;
+
+ /* stuff we store for the user */
+ kmutex_t os_user_ptr_lock;
+ void *os_user_ptr;
} objset_impl_t;
#define DMU_META_DNODE_OBJECT 0
+#define DMU_OS_IS_L2CACHEABLE(os) \
+ ((os)->os_secondary_cache == ZFS_CACHE_ALL || \
+ (os)->os_secondary_cache == ZFS_CACHE_METADATA)
+
/* called from zpl */
int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
objset_t **osp);
void dmu_objset_close(objset_t *os);
int dmu_objset_create(const char *name, dmu_objset_type_t type,
- objset_t *clone_parent,
- void (*func)(objset_t *os, void *arg, dmu_tx_t *tx), void *arg);
+ objset_t *clone_parent, uint64_t flags,
+ void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
int dmu_objset_destroy(const char *name);
-int dmu_objset_rollback(const char *name);
+int dmu_objset_rollback(objset_t *os);
int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive);
void dmu_objset_stats(objset_t *os, nvlist_t *nv);
void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
@@ -107,8 +116,10 @@ void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
uint64_t dmu_objset_fsid_guid(objset_t *os);
int dmu_objset_find(char *name, int func(char *, void *), void *arg,
int flags);
+int dmu_objset_find_spa(spa_t *spa, const char *name,
+ int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags);
void dmu_objset_byteswap(void *buf, size_t size);
-int dmu_objset_evict_dbufs(objset_t *os, int try);
+int dmu_objset_evict_dbufs(objset_t *os);
/* called from dsl */
void dmu_objset_sync(objset_impl_t *os, zio_t *zio, dmu_tx_t *tx);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_traverse.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_traverse.h
index ea9fa6c1e36c..05e5ffdbff5d 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_traverse.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_traverse.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -100,6 +100,7 @@ struct traverse_handle {
int traverse_dsl_dataset(struct dsl_dataset *ds, uint64_t txg_start,
int advance, blkptr_cb_t func, void *arg);
+int traverse_zvol(objset_t *os, int advance, blkptr_cb_t func, void *arg);
traverse_handle_t *traverse_init(spa_t *spa, blkptr_cb_t *func, void *arg,
int advance, int zio_flags);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_tx.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_tx.h
index 89f4799b57fe..6aaf35dc038f 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_tx.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_tx.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -64,6 +64,7 @@ struct dmu_tx {
uint64_t tx_space_towrite;
uint64_t tx_space_tofree;
uint64_t tx_space_tooverwrite;
+ uint64_t tx_space_tounref;
refcount_t tx_space_written;
refcount_t tx_space_freed;
#endif
@@ -86,6 +87,9 @@ typedef struct dmu_tx_hold {
uint64_t txh_space_towrite;
uint64_t txh_space_tofree;
uint64_t txh_space_tooverwrite;
+ uint64_t txh_space_tounref;
+ uint64_t txh_memory_tohold;
+ uint64_t txh_fudge;
#ifdef ZFS_DEBUG
enum dmu_tx_hold_type txh_type;
uint64_t txh_arg1;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
index 327e538cf809..c79ff48a60c5 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DNODE_H
#define _SYS_DNODE_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/zfs_context.h>
#include <sys/avl.h>
#include <sys/spa.h>
@@ -41,12 +39,19 @@ extern "C" {
#endif
/*
- * Flags.
+ * dnode_hold() flags.
*/
#define DNODE_MUST_BE_ALLOCATED 1
#define DNODE_MUST_BE_FREE 2
/*
+ * dnode_next_offset() flags.
+ */
+#define DNODE_FIND_HOLE 1
+#define DNODE_FIND_BACKWARDS 2
+#define DNODE_FIND_HAVELOCK 4
+
+/*
* Fixed constants.
*/
#define DNODE_SHIFT 9 /* 512 bytes */
@@ -64,6 +69,7 @@ extern "C" {
#define DN_MAX_NBLKPTR ((DNODE_SIZE - DNODE_CORE_SIZE) >> SPA_BLKPTRSHIFT)
#define DN_MAX_BONUSLEN (DNODE_SIZE - DNODE_CORE_SIZE - (1 << SPA_BLKPTRSHIFT))
#define DN_MAX_OBJECT (1ULL << DN_MAX_OBJECT_SHIFT)
+#define DN_ZERO_BONUSLEN (DN_MAX_BONUSLEN + 1)
#define DNODES_PER_BLOCK_SHIFT (DNODE_BLOCK_SHIFT - DNODE_SHIFT)
#define DNODES_PER_BLOCK (1ULL << DNODES_PER_BLOCK_SHIFT)
@@ -156,6 +162,7 @@ typedef struct dnode {
uint64_t dn_maxblkid;
uint8_t dn_next_nlevels[TXG_SIZE];
uint8_t dn_next_indblkshift[TXG_SIZE];
+ uint16_t dn_next_bonuslen[TXG_SIZE];
uint32_t dn_next_blksz[TXG_SIZE]; /* next block size in bytes */
/* protected by os_lock: */
@@ -197,11 +204,12 @@ dnode_t *dnode_special_open(struct objset_impl *dd, dnode_phys_t *dnp,
uint64_t object);
void dnode_special_close(dnode_t *dn);
+void dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx);
int dnode_hold(struct objset_impl *dd, uint64_t object,
void *ref, dnode_t **dnp);
int dnode_hold_impl(struct objset_impl *dd, uint64_t object, int flag,
void *ref, dnode_t **dnp);
-void dnode_add_ref(dnode_t *dn, void *ref);
+boolean_t dnode_add_ref(dnode_t *dn, void *ref);
void dnode_rele(dnode_t *dn, void *ref);
void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
@@ -220,13 +228,13 @@ void dnode_clear_range(dnode_t *dn, uint64_t blkid,
uint64_t nblks, dmu_tx_t *tx);
void dnode_diduse_space(dnode_t *dn, int64_t space);
void dnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx);
-void dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx);
+void dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t);
uint64_t dnode_block_freed(dnode_t *dn, uint64_t blkid);
void dnode_init(void);
void dnode_fini(void);
-int dnode_next_offset(dnode_t *dn, boolean_t hole, uint64_t *off, int minlvl,
- uint64_t blkfill, uint64_t txg);
-int dnode_evict_dbufs(dnode_t *dn, int try);
+int dnode_next_offset(dnode_t *dn, int flags, uint64_t *off,
+ int minlvl, uint64_t blkfill, uint64_t txg);
+void dnode_evict_dbufs(dnode_t *dn);
#ifdef ZFS_DEBUG
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h
index 8cfc1dcc9840..8665aec2dda8 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DSL_DATASET_H
#define _SYS_DSL_DATASET_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/dmu.h>
#include <sys/spa.h>
#include <sys/txg.h>
@@ -47,6 +45,8 @@ struct dsl_pool;
typedef void dsl_dataset_evict_func_t(struct dsl_dataset *, void *);
#define DS_FLAG_INCONSISTENT (1ULL<<0)
+#define DS_IS_INCONSISTENT(ds) \
+ ((ds)->ds_phys->ds_flags & DS_FLAG_INCONSISTENT)
/*
* NB: nopromote can not yet be set, but we want support for it in this
* on-disk version, so that we don't need to upgrade for it later. It
@@ -55,16 +55,29 @@ typedef void dsl_dataset_evict_func_t(struct dsl_dataset *, void *);
*/
#define DS_FLAG_NOPROMOTE (1ULL<<1)
+/*
+ * DS_FLAG_UNIQUE_ACCURATE is set if ds_unique_bytes has been correctly
+ * calculated for head datasets (starting with SPA_VERSION_UNIQUE_ACCURATE,
+ * refquota/refreservations).
+ */
+#define DS_FLAG_UNIQUE_ACCURATE (1ULL<<2)
+
+/*
+ * DS_FLAG_CI_DATASET is set if the dataset contains a file system whose
+ * name lookups should be performed case-insensitively.
+ */
+#define DS_FLAG_CI_DATASET (1ULL<<16)
+
typedef struct dsl_dataset_phys {
- uint64_t ds_dir_obj;
- uint64_t ds_prev_snap_obj;
+ uint64_t ds_dir_obj; /* DMU_OT_DSL_DIR */
+ uint64_t ds_prev_snap_obj; /* DMU_OT_DSL_DATASET */
uint64_t ds_prev_snap_txg;
- uint64_t ds_next_snap_obj;
- uint64_t ds_snapnames_zapobj; /* zap obj of snaps; ==0 for snaps */
+ uint64_t ds_next_snap_obj; /* DMU_OT_DSL_DATASET */
+ uint64_t ds_snapnames_zapobj; /* DMU_OT_DSL_DS_SNAP_MAP 0 for snaps */
uint64_t ds_num_children; /* clone/snap children; ==0 for head */
uint64_t ds_creation_time; /* seconds since 1970 */
uint64_t ds_creation_txg;
- uint64_t ds_deadlist_obj;
+ uint64_t ds_deadlist_obj; /* DMU_OT_BPLIST */
uint64_t ds_used_bytes;
uint64_t ds_compressed_bytes;
uint64_t ds_uncompressed_bytes;
@@ -76,9 +89,11 @@ typedef struct dsl_dataset_phys {
*/
uint64_t ds_fsid_guid;
uint64_t ds_guid;
- uint64_t ds_flags;
+ uint64_t ds_flags; /* DS_FLAG_* */
blkptr_t ds_bp;
- uint64_t ds_pad[8]; /* pad out to 320 bytes for good measure */
+ uint64_t ds_next_clones_obj; /* DMU_OT_DSL_CLONES */
+ uint64_t ds_props_obj; /* DMU_OT_DSL_PROPS for snaps */
+ uint64_t ds_pad[6]; /* pad out to 320 bytes for good measure */
} dsl_dataset_phys_t;
typedef struct dsl_dataset {
@@ -87,9 +102,11 @@ typedef struct dsl_dataset {
dsl_dataset_phys_t *ds_phys;
dmu_buf_t *ds_dbuf;
uint64_t ds_object;
+ uint64_t ds_fsid_guid;
- /* only used in syncing context: */
- struct dsl_dataset *ds_prev; /* only valid for non-snapshots */
+ /* only used in syncing context, only valid for non-snapshots: */
+ struct dsl_dataset *ds_prev;
+ uint64_t ds_origin_txg;
/* has internal locking: */
bplist_t ds_deadlist;
@@ -105,11 +122,23 @@ typedef struct dsl_dataset {
kmutex_t ds_lock;
void *ds_user_ptr;
dsl_dataset_evict_func_t *ds_user_evict_func;
- uint64_t ds_open_refcount;
+
+ /*
+ * ds_owner is protected by the ds_rwlock and the ds_lock
+ */
+ krwlock_t ds_rwlock;
+ kcondvar_t ds_exclusive_cv;
+ void *ds_owner;
/* no locking; only for making guesses */
uint64_t ds_trysnap_txg;
+ /* for objset_open() */
+ kmutex_t ds_opening_lock;
+
+ uint64_t ds_reserved; /* cached refreservation */
+ uint64_t ds_quota; /* cached refquota */
+
/* Protected by ds_lock; keep at end of struct for better locality */
char ds_snapname[MAXNAMELEN];
} dsl_dataset_t;
@@ -117,23 +146,38 @@ typedef struct dsl_dataset {
#define dsl_dataset_is_snapshot(ds) \
((ds)->ds_phys->ds_num_children != 0)
-int dsl_dataset_open_spa(spa_t *spa, const char *name, int mode,
- void *tag, dsl_dataset_t **dsp);
-int dsl_dataset_open(const char *name, int mode, void *tag,
+#define DS_UNIQUE_IS_ACCURATE(ds) \
+ (((ds)->ds_phys->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
+
+int dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp);
+int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj,
+ void *tag, dsl_dataset_t **);
+int dsl_dataset_own(const char *name, int flags, void *owner,
dsl_dataset_t **dsp);
-int dsl_dataset_open_obj(struct dsl_pool *dp, uint64_t dsobj,
- const char *tail, int mode, void *tag, dsl_dataset_t **);
+int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
+ int flags, void *owner, dsl_dataset_t **);
void dsl_dataset_name(dsl_dataset_t *ds, char *name);
-void dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag);
-uint64_t dsl_dataset_create_sync(dsl_dir_t *pds,
- const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx);
-int dsl_dataset_destroy(const char *name);
+void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
+void dsl_dataset_disown(dsl_dataset_t *ds, void *owner);
+void dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag);
+boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok,
+ void *owner);
+void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner);
+uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
+ dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
+uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
+ uint64_t flags, dmu_tx_t *tx);
+int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag);
int dsl_snapshots_destroy(char *fsname, char *snapname);
+dsl_checkfunc_t dsl_dataset_destroy_check;
+dsl_syncfunc_t dsl_dataset_destroy_sync;
dsl_checkfunc_t dsl_dataset_snapshot_check;
dsl_syncfunc_t dsl_dataset_snapshot_sync;
-int dsl_dataset_rollback(dsl_dataset_t *ds);
+int dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost);
int dsl_dataset_rename(char *name, const char *newname, boolean_t recursive);
int dsl_dataset_promote(const char *name);
+int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
+ boolean_t force);
void *dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
void *p, dsl_dataset_evict_func_t func);
@@ -144,10 +188,12 @@ void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
spa_t *dsl_dataset_get_spa(dsl_dataset_t *ds);
+boolean_t dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds);
+
void dsl_dataset_sync(dsl_dataset_t *os, zio_t *zio, dmu_tx_t *tx);
void dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
-void dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio,
+int dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio,
dmu_tx_t *tx);
int dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth);
uint64_t dsl_dataset_prev_snap_txg(dsl_dataset_t *ds);
@@ -160,11 +206,19 @@ void dsl_dataset_space(dsl_dataset_t *ds,
uint64_t *usedobjsp, uint64_t *availobjsp);
uint64_t dsl_dataset_fsid_guid(dsl_dataset_t *ds);
-void dsl_dataset_create_root(struct dsl_pool *dp, uint64_t *ddobjp,
- dmu_tx_t *tx);
-
int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf);
+int dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
+ uint64_t asize, uint64_t inflight, uint64_t *used,
+ uint64_t *ref_rsrv);
+int dsl_dataset_set_quota(const char *dsname, uint64_t quota);
+void dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr,
+ dmu_tx_t *tx);
+int dsl_dataset_set_reservation(const char *dsname, uint64_t reservation);
+void dsl_dataset_set_flags(dsl_dataset_t *ds, uint64_t flags);
+int64_t dsl_dataset_new_refreservation(dsl_dataset_t *ds, uint64_t reservation,
+ dmu_tx_t *tx);
+
#ifdef ZFS_DEBUG
#define dprintf_ds(ds, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_deleg.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_deleg.h
new file mode 100644
index 000000000000..a29e44e67d0c
--- /dev/null
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_deleg.h
@@ -0,0 +1,73 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_DSL_DELEG_H
+#define _SYS_DSL_DELEG_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/dmu.h>
+#include <sys/dsl_pool.h>
+#include <sys/zfs_context.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ZFS_DELEG_PERM_NONE ""
+#define ZFS_DELEG_PERM_CREATE "create"
+#define ZFS_DELEG_PERM_DESTROY "destroy"
+#define ZFS_DELEG_PERM_SNAPSHOT "snapshot"
+#define ZFS_DELEG_PERM_ROLLBACK "rollback"
+#define ZFS_DELEG_PERM_CLONE "clone"
+#define ZFS_DELEG_PERM_PROMOTE "promote"
+#define ZFS_DELEG_PERM_RENAME "rename"
+#define ZFS_DELEG_PERM_MOUNT "mount"
+#define ZFS_DELEG_PERM_SHARE "share"
+#define ZFS_DELEG_PERM_SEND "send"
+#define ZFS_DELEG_PERM_RECEIVE "receive"
+#define ZFS_DELEG_PERM_ALLOW "allow"
+#define ZFS_DELEG_PERM_USERPROP "userprop"
+#define ZFS_DELEG_PERM_VSCAN "vscan"
+
+/*
+ * Note: the names of properties that are marked delegatable are also
+ * valid delegated permissions
+ */
+
+int dsl_deleg_get(const char *ddname, nvlist_t **nvp);
+int dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset);
+int dsl_deleg_access(const char *ddname, const char *perm, cred_t *cr);
+void dsl_deleg_set_create_perms(dsl_dir_t *dd, dmu_tx_t *tx, cred_t *cr);
+int dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr);
+int dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr);
+int dsl_deleg_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx);
+boolean_t dsl_delegation_on(objset_t *os);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_DSL_DELEG_H */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h
index e0595d3c368b..86b9636ceaab 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DSL_DIR_H
#define _SYS_DSL_DIR_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/dmu.h>
#include <sys/dsl_pool.h>
#include <sys/dsl_synctask.h>
@@ -40,11 +38,22 @@ extern "C" {
struct dsl_dataset;
+typedef enum dd_used {
+ DD_USED_HEAD,
+ DD_USED_SNAP,
+ DD_USED_CHILD,
+ DD_USED_CHILD_RSRV,
+ DD_USED_REFRSRV,
+ DD_USED_NUM
+} dd_used_t;
+
+#define DD_FLAG_USED_BREAKDOWN (1<<0)
+
typedef struct dsl_dir_phys {
uint64_t dd_creation_time; /* not actually used */
uint64_t dd_head_dataset_obj;
uint64_t dd_parent_obj;
- uint64_t dd_clone_parent_obj;
+ uint64_t dd_origin_obj;
uint64_t dd_child_dir_zapobj;
/*
* how much space our children are accounting for; for leaf
@@ -58,7 +67,10 @@ typedef struct dsl_dir_phys {
/* Administrative reservation setting */
uint64_t dd_reserved;
uint64_t dd_props_zapobj;
- uint64_t dd_pad[21]; /* pad out to 256 bytes for good measure */
+ uint64_t dd_deleg_zapobj; /* dataset delegation permissions */
+ uint64_t dd_flags;
+ uint64_t dd_used_breakdown[DD_USED_NUM];
+ uint64_t dd_pad[14]; /* pad out to 256 bytes for good measure */
} dsl_dir_phys_t;
struct dsl_dir {
@@ -78,9 +90,6 @@ struct dsl_dir {
kmutex_t dd_lock;
list_t dd_prop_cbs; /* list of dsl_prop_cb_record_t's */
- /* Accounting */
- /* reflects any changes to dd_phys->dd_used_bytes made this syncing */
- int64_t dd_used_bytes;
/* gross estimate of space used by in-flight tx's */
uint64_t dd_tempreserved[TXG_SIZE];
/* amount of space we expect to write; == amount of dirty data */
@@ -99,8 +108,8 @@ int dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
void dsl_dir_name(dsl_dir_t *dd, char *buf);
int dsl_dir_namelen(dsl_dir_t *dd);
int dsl_dir_is_private(dsl_dir_t *dd);
-uint64_t dsl_dir_create_sync(dsl_dir_t *pds, const char *name, dmu_tx_t *tx);
-void dsl_dir_create_root(objset_t *mos, uint64_t *ddobjp, dmu_tx_t *tx);
+uint64_t dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds,
+ const char *name, dmu_tx_t *tx);
dsl_checkfunc_t dsl_dir_destroy_check;
dsl_syncfunc_t dsl_dir_destroy_sync;
void dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv);
@@ -109,18 +118,26 @@ uint64_t dsl_dir_space_available(dsl_dir_t *dd,
void dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx);
void dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx);
int dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t mem,
- uint64_t asize, uint64_t fsize, void **tr_cookiep, dmu_tx_t *tx);
+ uint64_t asize, uint64_t fsize, uint64_t usize, void **tr_cookiep,
+ dmu_tx_t *tx);
void dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx);
void dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx);
-void dsl_dir_diduse_space(dsl_dir_t *dd,
+void dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx);
+void dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
+ dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx);
int dsl_dir_set_quota(const char *ddname, uint64_t quota);
int dsl_dir_set_reservation(const char *ddname, uint64_t reservation);
int dsl_dir_rename(dsl_dir_t *dd, const char *newname);
int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space);
+int dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx);
+boolean_t dsl_dir_is_clone(dsl_dir_t *dd);
+void dsl_dir_new_refreservation(dsl_dir_t *dd, struct dsl_dataset *ds,
+ uint64_t reservation, cred_t *cr, dmu_tx_t *tx);
/* internal reserved dir name */
#define MOS_DIR_NAME "$MOS"
+#define ORIGIN_DIR_NAME "$ORIGIN"
#ifdef ZFS_DEBUG
#define dprintf_dd(dd, fmt, ...) do { \
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h
index f7ec67a0e062..4dd88fe6fa55 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h
@@ -19,19 +19,18 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DSL_POOL_H
#define _SYS_DSL_POOL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/spa.h>
#include <sys/txg.h>
#include <sys/txg_impl.h>
#include <sys/zfs_context.h>
+#include <sys/zio.h>
#ifdef __cplusplus
extern "C" {
@@ -39,6 +38,16 @@ extern "C" {
struct objset;
struct dsl_dir;
+struct dsl_dataset;
+struct dsl_pool;
+struct dmu_tx;
+
+enum scrub_func {
+ SCRUB_FUNC_NONE,
+ SCRUB_FUNC_CLEAN,
+ SCRUB_FUNC_NUMFUNCS
+};
+
typedef struct dsl_pool {
/* Immutable */
@@ -46,11 +55,31 @@ typedef struct dsl_pool {
struct objset *dp_meta_objset;
struct dsl_dir *dp_root_dir;
struct dsl_dir *dp_mos_dir;
+ struct dsl_dataset *dp_origin_snap;
uint64_t dp_root_dir_obj;
/* No lock needed - sync context only */
blkptr_t dp_meta_rootbp;
- list_t dp_synced_objsets;
+ list_t dp_synced_datasets;
+ hrtime_t dp_read_overhead;
+ uint64_t dp_throughput;
+ uint64_t dp_write_limit;
+
+ /* Uses dp_lock */
+ kmutex_t dp_lock;
+ uint64_t dp_space_towrite[TXG_SIZE];
+ uint64_t dp_tempreserved[TXG_SIZE];
+
+ enum scrub_func dp_scrub_func;
+ uint64_t dp_scrub_queue_obj;
+ uint64_t dp_scrub_min_txg;
+ uint64_t dp_scrub_max_txg;
+ zbookmark_t dp_scrub_bookmark;
+ boolean_t dp_scrub_pausing;
+ boolean_t dp_scrub_isresilver;
+ uint64_t dp_scrub_start_time;
+ kmutex_t dp_scrub_cancel_lock; /* protects dp_scrub_restart */
+ boolean_t dp_scrub_restart;
/* Has its own locking */
tx_state_t dp_tx;
@@ -69,11 +98,26 @@ typedef struct dsl_pool {
int dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
void dsl_pool_close(dsl_pool_t *dp);
-dsl_pool_t *dsl_pool_create(spa_t *spa, uint64_t txg);
+dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg);
void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg);
void dsl_pool_zil_clean(dsl_pool_t *dp);
int dsl_pool_sync_context(dsl_pool_t *dp);
uint64_t dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree);
+int dsl_pool_tempreserve_space(dsl_pool_t *dp, uint64_t space, dmu_tx_t *tx);
+void dsl_pool_tempreserve_clear(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
+void dsl_pool_memory_pressure(dsl_pool_t *dp);
+void dsl_pool_willuse_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
+int dsl_free(zio_t *pio, dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp,
+ zio_done_func_t *done, void *private, uint32_t arc_flags);
+void dsl_pool_ds_destroyed(struct dsl_dataset *ds, struct dmu_tx *tx);
+void dsl_pool_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx);
+void dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx);
+void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx);
+
+int dsl_pool_scrub_cancel(dsl_pool_t *dp);
+int dsl_pool_scrub_clean(dsl_pool_t *dp);
+void dsl_pool_scrub_sync(dsl_pool_t *dp, dmu_tx_t *tx);
+void dsl_pool_scrub_restart(dsl_pool_t *dp);
#ifdef __cplusplus
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_prop.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_prop.h
index d2debff8b8c0..d66caa86cff6 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_prop.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_prop.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -37,6 +37,7 @@ extern "C" {
#endif
struct dsl_dataset;
+struct dsl_dir;
/* The callback func may not call into the DMU or DSL! */
typedef void (dsl_prop_changed_cb_t)(void *arg, uint64_t newval);
@@ -59,12 +60,16 @@ int dsl_prop_get(const char *ddname, const char *propname,
int intsz, int numints, void *buf, char *setpoint);
int dsl_prop_get_integer(const char *ddname, const char *propname,
uint64_t *valuep, char *setpoint);
-int dsl_prop_get_all(objset_t *os, nvlist_t **nvp);
+int dsl_prop_get_all(objset_t *os, nvlist_t **nvp, boolean_t local);
+int dsl_prop_get_ds(struct dsl_dataset *ds, const char *propname,
+ int intsz, int numints, void *buf, char *setpoint);
+int dsl_prop_get_dd(struct dsl_dir *dd, const char *propname,
+ int intsz, int numints, void *buf, char *setpoint);
int dsl_prop_set(const char *ddname, const char *propname,
int intsz, int numints, const void *buf);
-int dsl_prop_set_dd(dsl_dir_t *dd, const char *propname,
- int intsz, int numints, const void *buf);
+void dsl_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
+ cred_t *cr, dmu_tx_t *tx);
void dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value);
void dsl_prop_nvlist_add_string(nvlist_t *nv,
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_synctask.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_synctask.h
index e695b182f74b..4995bfe5acca 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_synctask.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_synctask.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -38,7 +38,7 @@ extern "C" {
struct dsl_pool;
typedef int (dsl_checkfunc_t)(void *, void *, dmu_tx_t *);
-typedef void (dsl_syncfunc_t)(void *, void *, dmu_tx_t *);
+typedef void (dsl_syncfunc_t)(void *, void *, cred_t *, dmu_tx_t *);
typedef struct dsl_sync_task {
list_node_t dst_node;
@@ -53,9 +53,11 @@ typedef struct dsl_sync_task_group {
txg_node_t dstg_node;
list_t dstg_tasks;
struct dsl_pool *dstg_pool;
+ cred_t *dstg_cr;
uint64_t dstg_txg;
int dstg_err;
int dstg_space;
+ boolean_t dstg_nowaiter;
} dsl_sync_task_group_t;
dsl_sync_task_group_t *dsl_sync_task_group_create(struct dsl_pool *dp);
@@ -63,12 +65,16 @@ void dsl_sync_task_create(dsl_sync_task_group_t *dstg,
dsl_checkfunc_t *, dsl_syncfunc_t *,
void *arg1, void *arg2, int blocks_modified);
int dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg);
+void dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx);
void dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg);
void dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx);
int dsl_sync_task_do(struct dsl_pool *dp,
dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
void *arg1, void *arg2, int blocks_modified);
+void dsl_sync_task_do_nowait(struct dsl_pool *dp,
+ dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
+ void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx);
#ifdef __cplusplus
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h
index 095dd3ce2464..1c9d89e8fd69 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_METASLAB_H
#define _SYS_METASLAB_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/spa.h>
#include <sys/space_map.h>
#include <sys/txg.h>
@@ -47,8 +45,12 @@ extern void metaslab_fini(metaslab_t *msp);
extern void metaslab_sync(metaslab_t *msp, uint64_t txg);
extern void metaslab_sync_done(metaslab_t *msp, uint64_t txg);
-extern int metaslab_alloc(spa_t *spa, uint64_t psize, blkptr_t *bp,
- int ncopies, uint64_t txg, blkptr_t *hintbp, boolean_t hintbp_avoid);
+#define METASLAB_HINTBP_FAVOR 0x0
+#define METASLAB_HINTBP_AVOID 0x1
+#define METASLAB_GANG_HEADER 0x2
+
+extern int metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
+ blkptr_t *bp, int ncopies, uint64_t txg, blkptr_t *hintbp, int flags);
extern void metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg,
boolean_t now);
extern int metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/refcount.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/refcount.h
index c64c6627f783..e84b1bf65f99 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/refcount.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/refcount.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -28,6 +28,8 @@
#pragma ident "%Z%%M% %I% %E% SMI"
+#include <sys/cdefs.h>
+#include <sys/types.h>
#include_next <sys/refcount.h>
#include <sys/list.h>
#include <sys/zfs_context.h>
@@ -59,7 +61,7 @@ typedef struct refcount {
int64_t rc_removed_count;
} refcount_t;
-/* Note: refcount_t should be initialized to zero before use. */
+/* Note: refcount_t must be initialized with refcount_create() */
void refcount_create(refcount_t *rc);
void refcount_destroy(refcount_t *rc);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/rrwlock.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/rrwlock.h
new file mode 100644
index 000000000000..760fc822db56
--- /dev/null
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/rrwlock.h
@@ -0,0 +1,79 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_RR_RW_LOCK_H
+#define _SYS_RR_RW_LOCK_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/zfs_context.h>
+#include <sys/refcount.h>
+
+/*
+ * A reader-writer lock implementation that allows re-entrant reads, but
+ * still gives writers priority on "new" reads.
+ *
+ * See rrwlock.c for more details about the implementation.
+ *
+ * Fields of the rrwlock_t structure:
+ * - rr_lock: protects modification and reading of rrwlock_t fields
+ * - rr_cv: cv for waking up readers or waiting writers
+ * - rr_writer: thread id of the current writer
+ * - rr_anon_rount: number of active anonymous readers
+ * - rr_linked_rcount: total number of non-anonymous active readers
+ * - rr_writer_wanted: a writer wants the lock
+ */
+typedef struct rrwlock {
+ kmutex_t rr_lock;
+ kcondvar_t rr_cv;
+ kthread_t *rr_writer;
+ refcount_t rr_anon_rcount;
+ refcount_t rr_linked_rcount;
+ boolean_t rr_writer_wanted;
+} rrwlock_t;
+
+/*
+ * 'tag' is used in reference counting tracking. The
+ * 'tag' must be the same in a rrw_enter() as in its
+ * corresponding rrw_exit().
+ */
+void rrw_init(rrwlock_t *rrl);
+void rrw_destroy(rrwlock_t *rrl);
+void rrw_enter(rrwlock_t *rrl, krw_t rw, void *tag);
+void rrw_exit(rrwlock_t *rrl, void *tag);
+boolean_t rrw_held(rrwlock_t *rrl, krw_t rw);
+
+#define RRW_READ_HELD(x) rrw_held(x, RW_READER)
+#define RRW_WRITE_HELD(x) rrw_held(x, RW_WRITER)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_RR_RW_LOCK_H */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
index f0eb2e171aad..99bcb915911e 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_SPA_H
#define _SYS_SPA_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/avl.h>
#include <sys/zfs_context.h>
#include <sys/nvpair.h>
@@ -47,6 +45,7 @@ typedef struct vdev vdev_t;
typedef struct metaslab metaslab_t;
typedef struct zilog zilog_t;
typedef struct traverse_handle traverse_handle_t;
+typedef struct spa_aux_vdev spa_aux_vdev_t;
struct dsl_pool;
/*
@@ -88,6 +87,11 @@ struct dsl_pool;
#define SPA_BLOCKSIZES (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1)
/*
+ * Size of block to hold the configuration data (a packed nvlist)
+ */
+#define SPA_CONFIG_BLOCKSIZE (1 << 14)
+
+/*
* The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB.
* The ASIZE encoding should be at least 64 times larger (6 more bits)
* to support up to 4-way RAID-Z mirror mode with worst-case gang block
@@ -258,7 +262,6 @@ typedef struct blkptr {
((zc1).zc_word[2] - (zc2).zc_word[2]) | \
((zc1).zc_word[3] - (zc2).zc_word[3])))
-
#define DVA_IS_VALID(dva) (DVA_GET_ASIZE(dva) != 0)
#define ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3) \
@@ -291,6 +294,8 @@ typedef struct blkptr {
ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \
}
+#define BLK_FILL_ALREADY_FREED (-1ULL)
+
/*
* Note: the byteorder is either 0 or -1, both of which are palindromes.
* This simplifies the endianness handling a bit.
@@ -318,23 +323,30 @@ typedef struct blkptr {
extern int spa_open(const char *pool, spa_t **, void *tag);
extern int spa_get_stats(const char *pool, nvlist_t **config,
char *altroot, size_t buflen);
-extern int spa_create(const char *pool, nvlist_t *config, const char *altroot);
-extern int spa_import(const char *pool, nvlist_t *config, const char *altroot);
+extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
+ const char *history_str, nvlist_t *zplprops);
+extern int spa_check_rootconf(char *devpath, char *devid,
+ nvlist_t **bestconf, uint64_t *besttxg);
+extern boolean_t spa_rootdev_validate(nvlist_t *nv);
+extern int spa_import_rootpool(char *devpath, char *devid);
+extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props);
+extern int spa_import_faulted(const char *, nvlist_t *, nvlist_t *);
extern nvlist_t *spa_tryimport(nvlist_t *tryconfig);
extern int spa_destroy(char *pool);
-extern int spa_export(char *pool, nvlist_t **oldconfig);
+extern int spa_export(char *pool, nvlist_t **oldconfig, boolean_t force);
extern int spa_reset(char *pool);
extern void spa_async_request(spa_t *spa, int flag);
+extern void spa_async_unrequest(spa_t *spa, int flag);
extern void spa_async_suspend(spa_t *spa);
extern void spa_async_resume(spa_t *spa);
extern spa_t *spa_inject_addref(char *pool);
extern void spa_inject_delref(spa_t *spa);
-#define SPA_ASYNC_REOPEN 0x01
-#define SPA_ASYNC_REPLACE_DONE 0x02
-#define SPA_ASYNC_SCRUB 0x04
-#define SPA_ASYNC_RESILVER 0x08
-#define SPA_ASYNC_CONFIG_UPDATE 0x10
+#define SPA_ASYNC_CONFIG_UPDATE 0x01
+#define SPA_ASYNC_REMOVE 0x02
+#define SPA_ASYNC_PROBE 0x04
+#define SPA_ASYNC_RESILVER_DONE 0x08
+#define SPA_ASYNC_RESILVER 0x10
/* device manipulation */
extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
@@ -347,19 +359,27 @@ extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
/* spare state (which is global across all pools) */
extern void spa_spare_add(vdev_t *vd);
extern void spa_spare_remove(vdev_t *vd);
-extern boolean_t spa_spare_exists(uint64_t guid, uint64_t *pool);
+extern boolean_t spa_spare_exists(uint64_t guid, uint64_t *pool, int *refcnt);
extern void spa_spare_activate(vdev_t *vd);
+/* L2ARC state (which is global across all pools) */
+extern void spa_l2cache_add(vdev_t *vd);
+extern void spa_l2cache_remove(vdev_t *vd);
+extern boolean_t spa_l2cache_exists(uint64_t guid, uint64_t *pool);
+extern void spa_l2cache_activate(vdev_t *vd);
+extern void spa_l2cache_drop(spa_t *spa);
+extern void spa_l2cache_space_update(vdev_t *vd, int64_t space, int64_t alloc);
+
/* scrubbing */
-extern int spa_scrub(spa_t *spa, pool_scrub_type_t type, boolean_t force);
-extern void spa_scrub_suspend(spa_t *spa);
-extern void spa_scrub_resume(spa_t *spa);
-extern void spa_scrub_restart(spa_t *spa, uint64_t txg);
+extern int spa_scrub(spa_t *spa, pool_scrub_type_t type);
/* spa syncing */
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
extern void spa_sync_allpools(void);
+/* spa namespace global mutex */
+extern kmutex_t spa_namespace_lock;
+
/*
* SPA configuration functions in spa_config.c
*/
@@ -367,13 +387,14 @@ extern void spa_sync_allpools(void);
#define SPA_CONFIG_UPDATE_POOL 0
#define SPA_CONFIG_UPDATE_VDEVS 1
-extern void spa_config_sync(void);
+extern void spa_config_sync(spa_t *, boolean_t, boolean_t);
extern void spa_config_load(void);
extern nvlist_t *spa_all_configs(uint64_t *);
extern void spa_config_set(spa_t *spa, nvlist_t *config);
extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg,
int getstats);
extern void spa_config_update(spa_t *spa, int what);
+extern void spa_config_update_common(spa_t *spa, int what, boolean_t isroot);
/*
* Miscellaneous SPA routines in spa_misc.c
@@ -390,18 +411,34 @@ extern void spa_open_ref(spa_t *spa, void *tag);
extern void spa_close(spa_t *spa, void *tag);
extern boolean_t spa_refcount_zero(spa_t *spa);
-/* Pool configuration lock */
-extern void spa_config_enter(spa_t *spa, krw_t rw, void *tag);
-extern void spa_config_exit(spa_t *spa, void *tag);
-extern boolean_t spa_config_held(spa_t *spa, krw_t rw);
+#define SCL_CONFIG 0x01
+#define SCL_STATE 0x02
+#define SCL_L2ARC 0x04 /* hack until L2ARC 2.0 */
+#define SCL_ALLOC 0x08
+#define SCL_ZIO 0x10
+#define SCL_FREE 0x20
+#define SCL_VDEV 0x40
+#define SCL_LOCKS 7
+#define SCL_ALL ((1 << SCL_LOCKS) - 1)
+#define SCL_STATE_ALL (SCL_STATE | SCL_L2ARC | SCL_ZIO)
+
+/* Pool configuration locks */
+extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw);
+extern void spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw);
+extern void spa_config_exit(spa_t *spa, int locks, void *tag);
+extern int spa_config_held(spa_t *spa, int locks, krw_t rw);
/* Pool vdev add/remove lock */
extern uint64_t spa_vdev_enter(spa_t *spa);
extern int spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error);
+/* Pool vdev state change lock */
+extern void spa_vdev_state_enter(spa_t *spa);
+extern int spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error);
+
/* Accessor functions */
extern krwlock_t *spa_traverse_rwlock(spa_t *spa);
-extern int spa_traverse_wanted(spa_t *spa);
+extern boolean_t spa_traverse_wanted(spa_t *spa);
extern struct dsl_pool *spa_get_dsl(spa_t *spa);
extern blkptr_t *spa_get_rootblkptr(spa_t *spa);
extern void spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp);
@@ -414,8 +451,6 @@ extern uint64_t spa_first_txg(spa_t *spa);
extern uint64_t spa_version(spa_t *spa);
extern int spa_state(spa_t *spa);
extern uint64_t spa_freeze_txg(spa_t *spa);
-struct metaslab_class;
-extern struct metaslab_class *spa_metaslab_class_select(spa_t *spa);
extern uint64_t spa_get_alloc(spa_t *spa);
extern uint64_t spa_get_space(spa_t *spa);
extern uint64_t spa_get_dspace(spa_t *spa);
@@ -423,6 +458,8 @@ extern uint64_t spa_get_asize(spa_t *spa, uint64_t lsize);
extern uint64_t spa_version(spa_t *spa);
extern int spa_max_replication(spa_t *spa);
extern int spa_busy(void);
+extern uint8_t spa_get_failmode(spa_t *spa);
+extern boolean_t spa_suspended(spa_t *spa);
/* Miscellaneous support routines */
extern int spa_rename(const char *oldname, const char *newname);
@@ -432,18 +469,38 @@ extern void spa_strfree(char *);
extern uint64_t spa_get_random(uint64_t range);
extern void sprintf_blkptr(char *buf, int len, const blkptr_t *bp);
extern void spa_freeze(spa_t *spa);
-extern void spa_upgrade(spa_t *spa);
+extern void spa_upgrade(spa_t *spa, uint64_t version);
extern void spa_evict_all(void);
-extern vdev_t *spa_lookup_by_guid(spa_t *spa, uint64_t guid);
+extern vdev_t *spa_lookup_by_guid(spa_t *spa, uint64_t guid,
+ boolean_t l2cache);
extern boolean_t spa_has_spare(spa_t *, uint64_t guid);
extern uint64_t bp_get_dasize(spa_t *spa, const blkptr_t *bp);
+extern boolean_t spa_has_slogs(spa_t *spa);
+extern boolean_t spa_is_root(spa_t *spa);
/* history logging */
+typedef enum history_log_type {
+ LOG_CMD_POOL_CREATE,
+ LOG_CMD_NORMAL,
+ LOG_INTERNAL
+} history_log_type_t;
+
+typedef struct history_arg {
+ const char *ha_history_str;
+ history_log_type_t ha_log_type;
+ history_internal_events_t ha_event;
+ char ha_zone[MAXPATHLEN];
+} history_arg_t;
+
+extern char *spa_his_ievent_table[];
+
extern void spa_history_create_obj(spa_t *spa, dmu_tx_t *tx);
extern int spa_history_get(spa_t *spa, uint64_t *offset, uint64_t *len_read,
char *his_buf);
extern int spa_history_log(spa_t *spa, const char *his_buf,
- uint64_t pool_create);
+ history_log_type_t what);
+void spa_history_internal_log(history_internal_events_t event, spa_t *spa,
+ dmu_tx_t *tx, cred_t *cr, const char *fmt, ...);
/* error handling */
struct zbookmark;
@@ -451,7 +508,8 @@ struct zio;
extern void spa_log_error(spa_t *spa, struct zio *zio);
extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd,
struct zio *zio, uint64_t stateoroffset, uint64_t length);
-extern void zfs_post_ok(spa_t *spa, vdev_t *vd);
+extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
+extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
extern uint64_t spa_get_errlog_size(spa_t *spa);
extern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count);
extern void spa_errlog_rotate(spa_t *spa);
@@ -459,15 +517,22 @@ extern void spa_errlog_drain(spa_t *spa);
extern void spa_errlog_sync(spa_t *spa, uint64_t txg);
extern void spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub);
+/* vdev cache */
+extern void vdev_cache_stat_init(void);
+extern void vdev_cache_stat_fini(void);
+
/* Initialization and termination */
extern void spa_init(int flags);
extern void spa_fini(void);
+extern void spa_boot_init();
/* properties */
-extern int spa_set_props(spa_t *spa, nvlist_t *nvp);
-extern int spa_get_props(spa_t *spa, nvlist_t **nvp);
-extern void spa_clear_bootfs(spa_t *spa, uint64_t obj, dmu_tx_t *tx);
-extern boolean_t spa_has_bootfs(spa_t *spa);
+extern int spa_prop_set(spa_t *spa, nvlist_t *nvp);
+extern int spa_prop_get(spa_t *spa, nvlist_t **nvp);
+extern void spa_prop_clear_bootfs(spa_t *spa, uint64_t obj, dmu_tx_t *tx);
+
+/* asynchronous event notification */
+extern void spa_event_notify(spa_t *spa, vdev_t *vdev, const char *name);
#ifdef ZFS_DEBUG
#define dprintf_bp(bp, fmt, ...) do { \
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_boot.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_boot.h
new file mode 100644
index 000000000000..b56073b97516
--- /dev/null
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_boot.h
@@ -0,0 +1,45 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_SPA_BOOT_H
+#define _SYS_SPA_BOOT_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/nvpair.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern char *spa_get_bootprop(char *prop);
+extern void spa_free_bootprop(char *prop);
+extern int spa_get_rootconf(char *devpath, char *devid, nvlist_t **bestconf_p);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_SPA_BOOT_H */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
index 8c57123ad4b8..ab41ba605c6a 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_SPA_IMPL_H
#define _SYS_SPA_IMPL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/spa.h>
#include <sys/vdev.h>
#include <sys/metaslab.h>
@@ -43,13 +41,6 @@
extern "C" {
#endif
-typedef struct spa_config_lock {
- kmutex_t scl_lock;
- refcount_t scl_count;
- kthread_t *scl_writer;
- kcondvar_t scl_cv;
-} spa_config_lock_t;
-
typedef struct spa_error_entry {
zbookmark_t se_bookmark;
char *se_name;
@@ -64,31 +55,61 @@ typedef struct spa_history_phys {
uint64_t sh_records_lost; /* num of records overwritten */
} spa_history_phys_t;
-typedef struct spa_props {
- nvlist_t *spa_props_nvp;
- list_node_t spa_list_node;
-} spa_props_t;
+struct spa_aux_vdev {
+ uint64_t sav_object; /* MOS object for device list */
+ nvlist_t *sav_config; /* cached device config */
+ vdev_t **sav_vdevs; /* devices */
+ int sav_count; /* number devices */
+ boolean_t sav_sync; /* sync the device list */
+ nvlist_t **sav_pending; /* pending device additions */
+ uint_t sav_npending; /* # pending devices */
+};
+
+typedef struct spa_config_lock {
+ kmutex_t scl_lock;
+ kthread_t *scl_writer;
+ int scl_write_wanted;
+ kcondvar_t scl_cv;
+ refcount_t scl_count;
+} spa_config_lock_t;
+
+typedef struct spa_config_dirent {
+ list_node_t scd_link;
+ char *scd_path;
+} spa_config_dirent_t;
+
+typedef enum spa_log_state {
+ SPA_LOG_UNKNOWN = 0, /* unknown log state */
+ SPA_LOG_MISSING, /* missing log(s) */
+ SPA_LOG_CLEAR, /* clear the log(s) */
+ SPA_LOG_GOOD, /* log(s) are good */
+} spa_log_state_t;
+
+enum zio_taskq_type {
+ ZIO_TASKQ_ISSUE = 0,
+ ZIO_TASKQ_INTERRUPT,
+ ZIO_TASKQ_TYPES
+};
struct spa {
/*
* Fields protected by spa_namespace_lock.
*/
- char *spa_name; /* pool name */
+ char spa_name[MAXNAMELEN]; /* pool name */
avl_node_t spa_avl; /* node in spa_namespace_avl */
nvlist_t *spa_config; /* last synced config */
nvlist_t *spa_config_syncing; /* currently syncing config */
uint64_t spa_config_txg; /* txg of last config change */
- kmutex_t spa_config_cache_lock; /* for spa_config RW_READER */
int spa_sync_pass; /* iterate-to-convergence */
int spa_state; /* pool state */
int spa_inject_ref; /* injection references */
uint8_t spa_traverse_wanted; /* traverse lock wanted */
uint8_t spa_sync_on; /* sync threads are running */
spa_load_state_t spa_load_state; /* current load operation */
- taskq_t *spa_zio_issue_taskq[ZIO_TYPES];
- taskq_t *spa_zio_intr_taskq[ZIO_TYPES];
+ taskq_t *spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES];
dsl_pool_t *spa_dsl_pool;
metaslab_class_t *spa_normal_class; /* normal data class */
+ metaslab_class_t *spa_log_class; /* intent log data class */
uint64_t spa_first_txg; /* first txg after spa_open() */
uint64_t spa_final_txg; /* txg of export/destroy */
uint64_t spa_freeze_txg; /* freeze pool at this txg */
@@ -96,12 +117,10 @@ struct spa {
txg_list_t spa_vdev_txg_list; /* per-txg dirty vdev list */
vdev_t *spa_root_vdev; /* top-level vdev container */
uint64_t spa_load_guid; /* initial guid for spa_load */
- list_t spa_dirty_list; /* vdevs with dirty labels */
- uint64_t spa_spares_object; /* MOS object for spare list */
- nvlist_t *spa_sparelist; /* cached spare config */
- vdev_t **spa_spares; /* available hot spares */
- int spa_nspares; /* number of hot spares */
- boolean_t spa_sync_spares; /* sync the spares list */
+ list_t spa_config_dirty_list; /* vdevs with dirty config */
+ list_t spa_state_dirty_list; /* vdevs with dirty state */
+ spa_aux_vdev_t spa_spares; /* hot spares */
+ spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */
uint64_t spa_config_object; /* MOS object for pool config */
uint64_t spa_syncing_txg; /* txg currently syncing */
uint64_t spa_sync_bplist_obj; /* object for deferred frees */
@@ -110,28 +129,24 @@ struct spa {
uberblock_t spa_ubsync; /* last synced uberblock */
uberblock_t spa_uberblock; /* current uberblock */
kmutex_t spa_scrub_lock; /* resilver/scrub lock */
- kthread_t *spa_scrub_thread; /* scrub/resilver thread */
- traverse_handle_t *spa_scrub_th; /* scrub traverse handle */
- uint64_t spa_scrub_restart_txg; /* need to restart */
- uint64_t spa_scrub_mintxg; /* min txg we'll scrub */
- uint64_t spa_scrub_maxtxg; /* max txg we'll scrub */
uint64_t spa_scrub_inflight; /* in-flight scrub I/Os */
uint64_t spa_scrub_maxinflight; /* max in-flight scrub I/Os */
uint64_t spa_scrub_errors; /* scrub I/O error count */
- int spa_scrub_suspended; /* tell scrubber to suspend */
- kcondvar_t spa_scrub_cv; /* scrub thread state change */
kcondvar_t spa_scrub_io_cv; /* scrub I/O completion */
- uint8_t spa_scrub_stop; /* tell scrubber to stop */
uint8_t spa_scrub_active; /* active or suspended? */
uint8_t spa_scrub_type; /* type of scrub we're doing */
uint8_t spa_scrub_finished; /* indicator to rotate logs */
+ uint8_t spa_scrub_started; /* started since last boot */
+ uint8_t spa_scrub_reopen; /* scrub doing vdev_reopen */
kmutex_t spa_async_lock; /* protect async state */
kthread_t *spa_async_thread; /* thread doing async task */
int spa_async_suspended; /* async tasks suspended */
kcondvar_t spa_async_cv; /* wait for thread_exit() */
uint16_t spa_async_tasks; /* async task mask */
+ kmutex_t spa_async_root_lock; /* protects async root count */
+ uint64_t spa_async_root_count; /* number of async root zios */
+ kcondvar_t spa_async_root_cv; /* notify when count == 0 */
char *spa_root; /* alternate root directory */
- kmutex_t spa_uberblock_lock; /* vdev_uberblock_load_done() */
uint64_t spa_ena; /* spa-wide ereport ENA */
boolean_t spa_last_open_failed; /* true if last open faled */
kmutex_t spa_errlog_lock; /* error log lock */
@@ -144,22 +159,37 @@ struct spa {
uint64_t spa_history; /* history object */
kmutex_t spa_history_lock; /* history lock */
vdev_t *spa_pending_vdev; /* pending vdev additions */
- nvlist_t **spa_pending_spares; /* pending spare additions */
- uint_t spa_pending_nspares; /* # pending spares */
kmutex_t spa_props_lock; /* property lock */
uint64_t spa_pool_props_object; /* object for properties */
uint64_t spa_bootfs; /* default boot filesystem */
+ uint64_t spa_failmode; /* failure mode for the pool */
+ uint64_t spa_delegation; /* delegation on/off */
+ list_t spa_config_list; /* previous cache file(s) */
+ zio_t *spa_suspend_zio_root; /* root of all suspended I/O */
+ kmutex_t spa_suspend_lock; /* protects suspend_zio_root */
+ kcondvar_t spa_suspend_cv; /* notification of resume */
+ uint8_t spa_suspended; /* pool is suspended */
+ boolean_t spa_import_faulted; /* allow faulted vdevs */
+ boolean_t spa_is_root; /* pool is root */
+ int spa_minref; /* num refs when first opened */
+ spa_log_state_t spa_log_state; /* log state */
/*
- * spa_refcnt must be the last element because it changes size based on
- * compilation options. In order for the MDB module to function
- * correctly, the other fields must remain in the same location.
+ * spa_refcnt & spa_config_lock must be the last elements
+ * because refcount_t changes size based on compilation options.
+ * In order for the MDB module to function correctly, the other
+ * fields must remain in the same location.
*/
- spa_config_lock_t spa_config_lock; /* configuration changes */
+ spa_config_lock_t spa_config_lock[SCL_LOCKS]; /* config changes */
refcount_t spa_refcount; /* number of opens */
};
-extern const char *spa_config_dir;
-extern kmutex_t spa_namespace_lock;
+extern const char *spa_config_path;
+
+#define BOOTFS_COMPRESS_VALID(compress) \
+ ((compress) == ZIO_COMPRESS_LZJB || \
+ ((compress) == ZIO_COMPRESS_ON && \
+ ZIO_COMPRESS_ON_VALUE == ZIO_COMPRESS_LZJB) || \
+ (compress) == ZIO_COMPRESS_OFF)
#ifdef __cplusplus
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h
index dae129c2e5a4..23bdff211b4a 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -76,6 +75,14 @@ extern void txg_suspend(struct dsl_pool *dp);
extern void txg_resume(struct dsl_pool *dp);
/*
+ * Delay the caller by the specified number of ticks or until
+ * the txg closes (whichever comes first). This is intended
+ * to be used to throttle writers when the system nears its
+ * capacity.
+ */
+extern void txg_delay(struct dsl_pool *dp, uint64_t txg, int ticks);
+
+/*
* Wait until the given transaction group has finished syncing.
* Try to make this happen as soon as possible (eg. kick off any
* necessary syncs immediately). If txg==0, wait for the currently open
@@ -95,7 +102,10 @@ extern void txg_wait_open(struct dsl_pool *dp, uint64_t txg);
* Returns TRUE if we are "backed up" waiting for the syncing
* transaction to complete; otherwise returns FALSE.
*/
-extern int txg_stalled(struct dsl_pool *dp);
+extern boolean_t txg_stalled(struct dsl_pool *dp);
+
+/* returns TRUE if someone is waiting for the next txg to sync */
+extern boolean_t txg_sync_waiting(struct dsl_pool *dp);
/*
* Per-txg object lists.
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg_impl.h
index 45a138afaac3..a58be84be5af 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg_impl.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -59,7 +58,7 @@ typedef struct tx_state {
kcondvar_t tx_sync_done_cv;
kcondvar_t tx_quiesce_more_cv;
kcondvar_t tx_quiesce_done_cv;
- kcondvar_t tx_timeout_exit_cv;
+ kcondvar_t tx_timeout_cv;
kcondvar_t tx_exit_cv; /* wait for all threads to exit */
uint8_t tx_threads; /* number of threads */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/uberblock_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/uberblock_impl.h
index ab0f2dcf8c1b..55a0dd5aec0d 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/uberblock_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/uberblock_impl.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -49,7 +49,7 @@ extern "C" {
struct uberblock {
uint64_t ub_magic; /* UBERBLOCK_MAGIC */
- uint64_t ub_version; /* ZFS_VERSION */
+ uint64_t ub_version; /* SPA_VERSION */
uint64_t ub_txg; /* txg of last sync */
uint64_t ub_guid_sum; /* sum of all vdev guids */
uint64_t ub_timestamp; /* UTC time of last sync */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/unique.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/unique.h
index c8c177e3ca6c..2ef3093edf1c 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/unique.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/unique.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -39,8 +38,12 @@ extern "C" {
#define UNIQUE_BITS 56
void unique_init(void);
+void unique_fini(void);
-/* Return a new unique value. */
+/*
+ * Return a new unique value (which will not be uniquified against until
+ * it is unique_insert()-ed.
+ */
uint64_t unique_create(void);
/* Return a unique value, which equals the one passed in if possible. */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
index 31208116256d..013389501e51 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_VDEV_H
#define _SYS_VDEV_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/spa.h>
#include <sys/zio.h>
#include <sys/dmu.h>
@@ -40,35 +38,31 @@ extern "C" {
extern boolean_t zfs_nocacheflush;
-/*
- * Fault injection modes.
- */
-#define VDEV_FAULT_NONE 0
-#define VDEV_FAULT_RANDOM 1
-#define VDEV_FAULT_COUNT 2
-
extern int vdev_open(vdev_t *);
extern int vdev_validate(vdev_t *);
extern void vdev_close(vdev_t *);
extern int vdev_create(vdev_t *, uint64_t txg, boolean_t isreplace);
extern void vdev_init(vdev_t *, uint64_t txg);
extern void vdev_reopen(vdev_t *);
-extern int vdev_validate_spare(vdev_t *);
+extern int vdev_validate_aux(vdev_t *vd);
+extern zio_t *vdev_probe(vdev_t *vd, zio_t *pio);
+extern boolean_t vdev_is_bootable(vdev_t *vd);
extern vdev_t *vdev_lookup_top(spa_t *spa, uint64_t vdev);
extern vdev_t *vdev_lookup_by_guid(vdev_t *vd, uint64_t guid);
extern void vdev_dtl_dirty(space_map_t *sm, uint64_t txg, uint64_t size);
extern int vdev_dtl_contains(space_map_t *sm, uint64_t txg, uint64_t size);
extern void vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
int scrub_done);
-
-extern const char *vdev_description(vdev_t *vd);
+extern boolean_t vdev_resilver_needed(vdev_t *vd,
+ uint64_t *minp, uint64_t *maxp);
extern int vdev_metaslab_init(vdev_t *vd, uint64_t txg);
extern void vdev_metaslab_fini(vdev_t *vd);
extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs);
-extern void vdev_stat_update(zio_t *zio);
+extern void vdev_clear_stats(vdev_t *vd);
+extern void vdev_stat_update(zio_t *zio, uint64_t psize);
extern void vdev_scrub_stat_update(vdev_t *vd, pool_scrub_type_t type,
boolean_t complete);
extern int vdev_getspec(spa_t *spa, uint64_t vdev, char **vdev_spec);
@@ -77,24 +71,27 @@ extern void vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state,
vdev_aux_t aux);
extern void vdev_space_update(vdev_t *vd, int64_t space_delta,
- int64_t alloc_delta);
+ int64_t alloc_delta, boolean_t update_root);
extern uint64_t vdev_psize_to_asize(vdev_t *vd, uint64_t psize);
-extern void vdev_io_start(zio_t *zio);
-extern void vdev_io_done(zio_t *zio);
-
-extern int vdev_online(spa_t *spa, uint64_t guid);
-extern int vdev_offline(spa_t *spa, uint64_t guid, int istmp);
+extern int vdev_fault(spa_t *spa, uint64_t guid);
+extern int vdev_degrade(spa_t *spa, uint64_t guid);
+extern int vdev_online(spa_t *spa, uint64_t guid, uint64_t flags,
+ vdev_state_t *);
+extern int vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags);
extern void vdev_clear(spa_t *spa, vdev_t *vd);
-extern int vdev_error_inject(vdev_t *vd, zio_t *zio);
-extern int vdev_is_dead(vdev_t *vd);
+extern boolean_t vdev_is_dead(vdev_t *vd);
+extern boolean_t vdev_readable(vdev_t *vd);
+extern boolean_t vdev_writeable(vdev_t *vd);
+extern boolean_t vdev_accessible(vdev_t *vd, zio_t *zio);
extern void vdev_cache_init(vdev_t *vd);
extern void vdev_cache_fini(vdev_t *vd);
extern int vdev_cache_read(zio_t *zio);
extern void vdev_cache_write(zio_t *zio);
+extern void vdev_cache_purge(vdev_t *vd);
extern void vdev_queue_init(vdev_t *vd);
extern void vdev_queue_fini(vdev_t *vd);
@@ -103,16 +100,20 @@ extern void vdev_queue_io_done(zio_t *zio);
extern void vdev_config_dirty(vdev_t *vd);
extern void vdev_config_clean(vdev_t *vd);
-extern int vdev_config_sync(vdev_t *vd, uint64_t txg);
+extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg);
+
+extern void vdev_state_dirty(vdev_t *vd);
+extern void vdev_state_clean(vdev_t *vd);
extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd,
- boolean_t getstats, boolean_t isspare);
+ boolean_t getstats, boolean_t isspare, boolean_t isl2cache);
/*
* Label routines
*/
struct uberblock;
extern uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset);
+extern int vdev_label_number(uint64_t psise, uint64_t offset);
extern nvlist_t *vdev_label_read_config(vdev_t *vd);
extern void vdev_uberblock_load(zio_t *zio, vdev_t *vd, struct uberblock *ub);
@@ -120,7 +121,8 @@ typedef enum {
VDEV_LABEL_CREATE, /* create/add a new device */
VDEV_LABEL_REPLACE, /* replace an existing device */
VDEV_LABEL_SPARE, /* add a new hot spare */
- VDEV_LABEL_REMOVE /* remove an existing device */
+ VDEV_LABEL_REMOVE, /* remove an existing device */
+ VDEV_LABEL_L2CACHE /* add an L2ARC cache device */
} vdev_labeltype_t;
extern int vdev_label_init(vdev_t *vd, uint64_t txg, vdev_labeltype_t reason);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_disk.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_disk.h
index 95536a77db9a..b748571ea0c3 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_disk.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_disk.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -31,6 +30,8 @@
#include <sys/vdev.h>
#ifdef _KERNEL
+#include <sys/buf.h>
+#include <sys/ddi.h>
#include <sys/sunldi.h>
#include <sys/sunddi.h>
#endif
@@ -45,6 +46,9 @@ typedef struct vdev_disk {
ldi_handle_t vd_lh;
} vdev_disk_t;
+#ifdef _KERNEL
+extern int vdev_disk_physio(ldi_handle_t, caddr_t, size_t, uint64_t, int);
+#endif
#ifdef __cplusplus
}
#endif
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
index aba756713f9c..7e24edea7f38 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_VDEV_IMPL_H
#define _SYS_VDEV_IMPL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/avl.h>
#include <sys/dmu.h>
#include <sys/metaslab.h>
@@ -61,7 +59,7 @@ typedef struct vdev_cache_entry vdev_cache_entry_t;
typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *ashift);
typedef void vdev_close_func_t(vdev_t *vd);
typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize);
-typedef void vdev_io_start_func_t(zio_t *zio);
+typedef int vdev_io_start_func_t(zio_t *zio);
typedef void vdev_io_done_func_t(zio_t *zio);
typedef void vdev_state_change_func_t(vdev_t *vd, int, int);
@@ -140,9 +138,12 @@ struct vdev {
txg_list_t vdev_ms_list; /* per-txg dirty metaslab lists */
txg_list_t vdev_dtl_list; /* per-txg dirty DTL lists */
txg_node_t vdev_txg_node; /* per-txg dirty vdev linkage */
- uint8_t vdev_reopen_wanted; /* async reopen wanted? */
- list_node_t vdev_dirty_node; /* config dirty list */
+ boolean_t vdev_remove_wanted; /* async remove wanted? */
+ boolean_t vdev_probe_wanted; /* async probe wanted? */
+ list_node_t vdev_config_dirty_node; /* config dirty list */
+ list_node_t vdev_state_dirty_node; /* state dirty list */
uint64_t vdev_deflate_ratio; /* deflation ratio (x512) */
+ uint64_t vdev_islog; /* is an intent log device */
/*
* Leaf vdev state.
@@ -151,22 +152,30 @@ struct vdev {
space_map_obj_t vdev_dtl; /* dirty time log on-disk state */
txg_node_t vdev_dtl_node; /* per-txg dirty DTL linkage */
uint64_t vdev_wholedisk; /* true if this is a whole disk */
- uint64_t vdev_offline; /* device taken offline? */
+ uint64_t vdev_offline; /* persistent offline state */
+ uint64_t vdev_faulted; /* persistent faulted state */
+ uint64_t vdev_degraded; /* persistent degraded state */
+ uint64_t vdev_removed; /* persistent removed state */
uint64_t vdev_nparity; /* number of parity devices for raidz */
char *vdev_path; /* vdev path (if any) */
char *vdev_devid; /* vdev devid (if any) */
- uint64_t vdev_fault_arg; /* fault injection paramater */
- int vdev_fault_mask; /* zio types to fault */
- uint8_t vdev_fault_mode; /* fault injection mode */
- uint8_t vdev_cache_active; /* vdev_cache and vdev_queue */
+ char *vdev_physpath; /* vdev device path (if any) */
+ uint64_t vdev_not_present; /* not present during import */
+ uint64_t vdev_unspare; /* unspare when resilvering done */
+ hrtime_t vdev_last_try; /* last reopen time */
+ boolean_t vdev_nowritecache; /* true if flushwritecache failed */
+ boolean_t vdev_checkremove; /* temporary online test */
+ boolean_t vdev_forcefault; /* force online fault */
uint8_t vdev_tmpoffline; /* device taken offline temporarily? */
uint8_t vdev_detached; /* device detached? */
- uint64_t vdev_isspare; /* was a hot spare */
+ uint8_t vdev_cant_read; /* vdev is failing all reads */
+ uint8_t vdev_cant_write; /* vdev is failing all writes */
+ uint64_t vdev_isspare; /* was a hot spare */
+ uint64_t vdev_isl2cache; /* was a l2cache device */
vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
vdev_cache_t vdev_cache; /* physical block cache */
- uint64_t vdev_not_present; /* not present during import */
- hrtime_t vdev_last_try; /* last reopen time */
- boolean_t vdev_nowritecache; /* true if flushwritecache failed */
+ spa_aux_vdev_t *vdev_aux; /* for l2cache vdevs */
+ zio_t *vdev_probe_zio; /* root of current probe */
/*
* For DTrace to work in userland (libzpool) context, these fields must
@@ -177,6 +186,7 @@ struct vdev {
*/
kmutex_t vdev_dtl_lock; /* vdev_dtl_{map,resilver} */
kmutex_t vdev_stat_lock; /* vdev_stat */
+ kmutex_t vdev_probe_lock; /* protects vdev_probe_zio */
};
#define VDEV_SKIP_SIZE (8 << 10)
@@ -239,6 +249,7 @@ typedef struct vdev_label {
#define VDEV_ALLOC_LOAD 0
#define VDEV_ALLOC_ADD 1
#define VDEV_ALLOC_SPARE 2
+#define VDEV_ALLOC_L2CACHE 3
/*
* Allocate or free a vdev
@@ -275,8 +286,8 @@ extern vdev_ops_t vdev_raidz_ops;
extern vdev_ops_t vdev_geom_ops;
#else
extern vdev_ops_t vdev_disk_ops;
-extern vdev_ops_t vdev_file_ops;
#endif
+extern vdev_ops_t vdev_file_ops;
extern vdev_ops_t vdev_missing_ops;
extern vdev_ops_t vdev_spare_ops;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap.h
index f89d9385ea38..f88cc068bd57 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -31,7 +31,7 @@
/*
* ZAP - ZFS Attribute Processor
*
- * The ZAP is a module which sits on top of the DMU (Data Managemnt
+ * The ZAP is a module which sits on top of the DMU (Data Management
* Unit) and implements a higher-level storage primitive using DMU
* objects. Its primary consumer is the ZPL (ZFS Posix Layer).
*
@@ -91,10 +91,38 @@ extern "C" {
#define ZAP_MAXVALUELEN 1024
/*
+ * The matchtype specifies which entry will be accessed.
+ * MT_EXACT: only find an exact match (non-normalized)
+ * MT_FIRST: find the "first" normalized (case and Unicode
+ * form) match; the designated "first" match will not change as long
+ * as the set of entries with this normalization doesn't change
+ * MT_BEST: if there is an exact match, find that, otherwise find the
+ * first normalized match
+ */
+typedef enum matchtype
+{
+ MT_EXACT,
+ MT_BEST,
+ MT_FIRST
+} matchtype_t;
+
+/*
* Create a new zapobj with no attributes and return its object number.
+ * MT_EXACT will cause the zap object to only support MT_EXACT lookups,
+ * otherwise any matchtype can be used for lookups.
+ *
+ * normflags specifies what normalization will be done. values are:
+ * 0: no normalization (legacy on-disk format, supports MT_EXACT matching
+ * only)
+ * U8_TEXTPREP_TOLOWER: case normalization will be performed.
+ * MT_FIRST/MT_BEST matching will find entries that match without
+ * regard to case (eg. looking for "foo" can find an entry "Foo").
+ * Eventually, other flags will permit unicode normalization as well.
*/
uint64_t zap_create(objset_t *ds, dmu_object_type_t ot,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
+uint64_t zap_create_norm(objset_t *ds, int normflags, dmu_object_type_t ot,
+ dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
/*
* Create a new zapobj with no attributes from the given (unallocated)
@@ -102,6 +130,9 @@ uint64_t zap_create(objset_t *ds, dmu_object_type_t ot,
*/
int zap_create_claim(objset_t *ds, uint64_t obj, dmu_object_type_t ot,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
+int zap_create_claim_norm(objset_t *ds, uint64_t obj,
+ int normflags, dmu_object_type_t ot,
+ dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
/*
* The zapobj passed in must be a valid ZAP object for all of the
@@ -140,9 +171,20 @@ int zap_destroy(objset_t *ds, uint64_t zapobj, dmu_tx_t *tx);
* If the attribute is longer than the buffer, as many integers as will
* fit will be transferred to 'buf'. If the entire attribute was not
* transferred, the call will return EOVERFLOW.
+ *
+ * If rn_len is nonzero, realname will be set to the name of the found
+ * entry (which may be different from the requested name if matchtype is
+ * not MT_EXACT).
+ *
+ * If normalization_conflictp is not NULL, it will be set if there is
+ * another name with the same case/unicode normalized form.
*/
int zap_lookup(objset_t *ds, uint64_t zapobj, const char *name,
uint64_t integer_size, uint64_t num_integers, void *buf);
+int zap_lookup_norm(objset_t *ds, uint64_t zapobj, const char *name,
+ uint64_t integer_size, uint64_t num_integers, void *buf,
+ matchtype_t mt, char *realname, int rn_len,
+ boolean_t *normalization_conflictp);
/*
* Create an attribute with the given name and value.
@@ -182,6 +224,8 @@ int zap_length(objset_t *ds, uint64_t zapobj, const char *name,
* return ENOENT.
*/
int zap_remove(objset_t *ds, uint64_t zapobj, const char *name, dmu_tx_t *tx);
+int zap_remove_norm(objset_t *ds, uint64_t zapobj, const char *name,
+ matchtype_t mt, dmu_tx_t *tx);
/*
* Returns (in *count) the number of attributes in the specified zap
@@ -191,11 +235,28 @@ int zap_count(objset_t *ds, uint64_t zapobj, uint64_t *count);
/*
- * Returns (in name) the name of the entry whose value
+ * Returns (in name) the name of the entry whose (value & mask)
* (za_first_integer) is value, or ENOENT if not found. The string
- * pointed to by name must be at least 256 bytes long.
+ * pointed to by name must be at least 256 bytes long. If mask==0, the
+ * match must be exact (ie, same as mask=-1ULL).
*/
-int zap_value_search(objset_t *os, uint64_t zapobj, uint64_t value, char *name);
+int zap_value_search(objset_t *os, uint64_t zapobj,
+ uint64_t value, uint64_t mask, char *name);
+
+/*
+ * Transfer all the entries from fromobj into intoobj. Only works on
+ * int_size=8 num_integers=1 values. Fails if there are any duplicated
+ * entries.
+ */
+int zap_join(objset_t *os, uint64_t fromobj, uint64_t intoobj, dmu_tx_t *tx);
+
+/*
+ * Manipulate entries where the name + value are the "same" (the name is
+ * a stringified version of the value).
+ */
+int zap_add_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx);
+int zap_remove_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx);
+int zap_lookup_int(objset_t *os, uint64_t obj, uint64_t value);
struct zap;
struct zap_leaf;
@@ -211,6 +272,11 @@ typedef struct zap_cursor {
typedef struct {
int za_integer_length;
+ /*
+ * za_normalization_conflict will be set if there are additional
+ * entries with this normalized form (eg, "foo" and "Foo").
+ */
+ boolean_t za_normalization_conflict;
uint64_t za_num_integers;
uint64_t za_first_integer; /* no sign extension for <8byte ints */
char za_name[MAXNAMELEN];
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_impl.h
index 4e43f4ae49a1..0dc02ab6b0ac 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_impl.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -59,7 +59,8 @@ typedef struct mzap_ent_phys {
typedef struct mzap_phys {
uint64_t mz_block_type; /* ZBT_MICRO */
uint64_t mz_salt;
- uint64_t mz_pad[6];
+ uint64_t mz_normflags;
+ uint64_t mz_pad[5];
mzap_ent_phys_t mz_chunk[1];
/* actually variable size depending on block size */
} mzap_phys_t;
@@ -127,6 +128,7 @@ typedef struct zap_phys {
uint64_t zap_num_leafs; /* number of leafs */
uint64_t zap_num_entries; /* number of entries */
uint64_t zap_salt; /* salt to stir into hash function */
+ uint64_t zap_normflags; /* flags for u8_textprep_str() */
/*
* This structure is followed by padding, and then the embedded
* pointer table. The embedded pointer table takes up second
@@ -142,7 +144,8 @@ typedef struct zap {
uint64_t zap_object;
struct dmu_buf *zap_dbuf;
krwlock_t zap_rwlock;
- int zap_ismicro;
+ boolean_t zap_ismicro;
+ int zap_normflags;
uint64_t zap_salt;
union {
struct {
@@ -165,34 +168,45 @@ typedef struct zap {
} zap_u;
} zap_t;
+typedef struct zap_name {
+ zap_t *zn_zap;
+ const char *zn_name_orij;
+ uint64_t zn_hash;
+ matchtype_t zn_matchtype;
+ const char *zn_name_norm;
+ char zn_normbuf[ZAP_MAXNAMELEN];
+} zap_name_t;
+
#define zap_f zap_u.zap_fat
#define zap_m zap_u.zap_micro
-uint64_t zap_hash(zap_t *zap, const char *name);
+boolean_t zap_match(zap_name_t *zn, const char *matchname);
int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
- krw_t lti, int fatreader, zap_t **zapp);
+ krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp);
void zap_unlockdir(zap_t *zap);
void zap_evict(dmu_buf_t *db, void *vmzap);
+zap_name_t *zap_name_alloc(zap_t *zap, const char *name, matchtype_t mt);
+void zap_name_free(zap_name_t *zn);
#define ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n))))
void fzap_byteswap(void *buf, size_t size);
int fzap_count(zap_t *zap, uint64_t *count);
-int fzap_lookup(zap_t *zap, const char *name,
- uint64_t integer_size, uint64_t num_integers, void *buf);
-int fzap_add(zap_t *zap, const char *name,
- uint64_t integer_size, uint64_t num_integers,
+int fzap_lookup(zap_name_t *zn,
+ uint64_t integer_size, uint64_t num_integers, void *buf,
+ char *realname, int rn_len, boolean_t *normalization_conflictp);
+int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
const void *val, dmu_tx_t *tx);
-int fzap_update(zap_t *zap, const char *name,
+int fzap_update(zap_name_t *zn,
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
-int fzap_length(zap_t *zap, const char *name,
+int fzap_length(zap_name_t *zn,
uint64_t *integer_size, uint64_t *num_integers);
-int fzap_remove(zap_t *zap, const char *name, dmu_tx_t *tx);
+int fzap_remove(zap_name_t *zn, dmu_tx_t *tx);
int fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za);
void fzap_get_stats(zap_t *zap, zap_stats_t *zs);
void zap_put_leaf(struct zap_leaf *l);
-int fzap_add_cd(zap_t *zap, const char *name,
+int fzap_add_cd(zap_name_t *zn,
uint64_t integer_size, uint64_t num_integers,
const void *val, uint32_t cd, dmu_tx_t *tx);
void fzap_upgrade(zap_t *zap, dmu_tx_t *tx);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_leaf.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_leaf.h
index 147fb7212454..14144e059e54 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_leaf.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_leaf.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -92,6 +92,8 @@ typedef enum zap_chunk_type {
ZAP_CHUNK_TYPE_MAX = 250
} zap_chunk_type_t;
+#define ZLF_ENTRIES_CDSORTED (1<<0)
+
/*
* TAKE NOTE:
* If zap_leaf_phys_t is modified, zap_leaf_byteswap() must be modified.
@@ -109,7 +111,8 @@ typedef struct zap_leaf_phys {
/* above is accessable to zap, below is zap_leaf private */
uint16_t lh_freelist; /* chunk head of free list */
- uint8_t lh_pad2[12];
+ uint8_t lh_flags; /* ZLF_* flags */
+ uint8_t lh_pad2[11];
} l_hdr; /* 2 24-byte chunks */
/*
@@ -148,7 +151,7 @@ typedef union zap_leaf_chunk {
} zap_leaf_chunk_t;
typedef struct zap_leaf {
- krwlock_t l_rwlock; /* only used on head of chain */
+ krwlock_t l_rwlock;
uint64_t l_blkid; /* 1<<ZAP_BLOCK_SHIFT byte block off */
int l_bs; /* block size shift */
dmu_buf_t *l_dbuf;
@@ -174,7 +177,7 @@ typedef struct zap_entry_handle {
* value must equal zap_hash(name).
*/
extern int zap_leaf_lookup(zap_leaf_t *l,
- const char *name, uint64_t h, zap_entry_handle_t *zeh);
+ zap_name_t *zn, zap_entry_handle_t *zeh);
/*
* Return a handle to the entry with this hash+cd, or the entry with the
@@ -219,12 +222,19 @@ extern int zap_entry_create(zap_leaf_t *l,
zap_entry_handle_t *zeh);
/*
+ * Return true if there are additional entries with the same normalized
+ * form.
+ */
+extern boolean_t zap_entry_normalization_conflict(zap_entry_handle_t *zeh,
+ zap_name_t *zn, const char *name, zap_t *zap);
+
+/*
* Other stuff.
*/
-extern void zap_leaf_init(zap_leaf_t *l);
+extern void zap_leaf_init(zap_leaf_t *l, boolean_t sort);
extern void zap_leaf_byteswap(zap_leaf_phys_t *buf, int len);
-extern void zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl);
+extern void zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort);
extern void zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs);
#ifdef __cplusplus
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_acl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_acl.h
index 3250b760fb07..fe953184db44 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_acl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_acl.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -33,6 +33,7 @@
#endif
#include <sys/acl.h>
#include <sys/dmu.h>
+#include <sys/zfs_fuid.h>
#ifdef __cplusplus
extern "C" {
@@ -40,33 +41,131 @@ extern "C" {
struct znode_phys;
-#define ACCESS_UNDETERMINED -1
-
#define ACE_SLOT_CNT 6
+#define ZFS_ACL_VERSION_INITIAL 0ULL
+#define ZFS_ACL_VERSION_FUID 1ULL
+#define ZFS_ACL_VERSION ZFS_ACL_VERSION_FUID
+
+/*
+ * ZFS ACLs are store in various forms.
+ * Files created with ACL version ZFS_ACL_VERSION_INITIAL
+ * will all be created with fixed length ACEs of type
+ * zfs_oldace_t.
+ *
+ * Files with ACL version ZFS_ACL_VERSION_FUID will be created
+ * with various sized ACEs. The abstraction entries will utilize
+ * zfs_ace_hdr_t, normal user/group entries will use zfs_ace_t
+ * and some specialized CIFS ACEs will use zfs_object_ace_t.
+ */
+
+/*
+ * All ACEs have a common hdr. For
+ * owner@, group@, and everyone@ this is all
+ * thats needed.
+ */
+typedef struct zfs_ace_hdr {
+ uint16_t z_type;
+ uint16_t z_flags;
+ uint32_t z_access_mask;
+} zfs_ace_hdr_t;
+
+typedef zfs_ace_hdr_t zfs_ace_abstract_t;
+
+/*
+ * Standard ACE
+ */
+typedef struct zfs_ace {
+ zfs_ace_hdr_t z_hdr;
+ uint64_t z_fuid;
+} zfs_ace_t;
+
+/*
+ * The following type only applies to ACE_ACCESS_ALLOWED|DENIED_OBJECT_ACE_TYPE
+ * and will only be set/retrieved in a CIFS context.
+ */
-typedef struct zfs_znode_acl {
+typedef struct zfs_object_ace {
+ zfs_ace_t z_ace;
+ uint8_t z_object_type[16]; /* object type */
+ uint8_t z_inherit_type[16]; /* inherited object type */
+} zfs_object_ace_t;
+
+typedef struct zfs_oldace {
+ uint32_t z_fuid; /* "who" */
+ uint32_t z_access_mask; /* access mask */
+ uint16_t z_flags; /* flags, i.e inheritance */
+ uint16_t z_type; /* type of entry allow/deny */
+} zfs_oldace_t;
+
+typedef struct zfs_acl_phys_v0 {
+ uint64_t z_acl_extern_obj; /* ext acl pieces */
+ uint32_t z_acl_count; /* Number of ACEs */
+ uint16_t z_acl_version; /* acl version */
+ uint16_t z_acl_pad; /* pad */
+ zfs_oldace_t z_ace_data[ACE_SLOT_CNT]; /* 6 standard ACEs */
+} zfs_acl_phys_v0_t;
+
+#define ZFS_ACE_SPACE (sizeof (zfs_oldace_t) * ACE_SLOT_CNT)
+
+typedef struct zfs_acl_phys {
uint64_t z_acl_extern_obj; /* ext acl pieces */
- uint32_t z_acl_count; /* Number of ACEs */
+ uint32_t z_acl_size; /* Number of bytes in ACL */
uint16_t z_acl_version; /* acl version */
- uint16_t z_acl_pad; /* pad */
- ace_t z_ace_data[ACE_SLOT_CNT]; /* 6 standard ACEs */
-} zfs_znode_acl_t;
-
-#define ACL_DATA_ALLOCED 0x1
+ uint16_t z_acl_count; /* ace count */
+ uint8_t z_ace_data[ZFS_ACE_SPACE]; /* space for embedded ACEs */
+} zfs_acl_phys_t;
+
+
+
+typedef struct acl_ops {
+ uint32_t (*ace_mask_get) (void *acep); /* get access mask */
+ void (*ace_mask_set) (void *acep,
+ uint32_t mask); /* set access mask */
+ uint16_t (*ace_flags_get) (void *acep); /* get flags */
+ void (*ace_flags_set) (void *acep,
+ uint16_t flags); /* set flags */
+ uint16_t (*ace_type_get)(void *acep); /* get type */
+ void (*ace_type_set)(void *acep,
+ uint16_t type); /* set type */
+ uint64_t (*ace_who_get)(void *acep); /* get who/fuid */
+ void (*ace_who_set)(void *acep,
+ uint64_t who); /* set who/fuid */
+ size_t (*ace_size)(void *acep); /* how big is this ace */
+ size_t (*ace_abstract_size)(void); /* sizeof abstract entry */
+ int (*ace_mask_off)(void); /* off of access mask in ace */
+ int (*ace_data)(void *acep, void **datap);
+ /* ptr to data if any */
+} acl_ops_t;
/*
- * Max ACL size is prepended deny for all entries + the
- * canonical six tacked on * the end.
+ * A zfs_acl_t structure is composed of a list of zfs_acl_node_t's.
+ * Each node will have one or more ACEs associated with it. You will
+ * only have multiple nodes during a chmod operation. Normally only
+ * one node is required.
*/
-#define MAX_ACL_SIZE (MAX_ACL_ENTRIES * 2 + 6)
+typedef struct zfs_acl_node {
+ list_node_t z_next; /* Next chunk of ACEs */
+ void *z_acldata; /* pointer into actual ACE(s) */
+ void *z_allocdata; /* pointer to kmem allocated memory */
+ size_t z_allocsize; /* Size of blob in bytes */
+ size_t z_size; /* length of ACL data */
+ int z_ace_count; /* number of ACEs in this acl node */
+ int z_ace_idx; /* ace iterator positioned on */
+} zfs_acl_node_t;
typedef struct zfs_acl {
- int z_slots; /* number of allocated slots for ACEs */
- int z_acl_count;
- uint_t z_state;
- ace_t *z_acl;
+ int z_acl_count; /* Number of ACEs */
+ size_t z_acl_bytes; /* Number of bytes in ACL */
+ uint_t z_version; /* version of ACL */
+ void *z_next_ace; /* pointer to next ACE */
+ int z_hints; /* ACL hints (ZFS_INHERIT_ACE ...) */
+ zfs_acl_node_t *z_curr_node; /* current node iterator is handling */
+ list_t z_acl; /* chunks of ACE data */
+ acl_ops_t z_ops; /* ACL operations */
+ boolean_t z_has_fuids; /* FUIDs present in ACL? */
} zfs_acl_t;
+#define ACL_DATA_ALLOCED 0x1
#define ZFS_ACL_SIZE(aclcnt) (sizeof (ace_t) * (aclcnt))
/*
@@ -80,31 +179,34 @@ typedef struct zfs_acl {
#define ZFS_ACL_NOALLOW 1
#define ZFS_ACL_GROUPMASK 2
#define ZFS_ACL_PASSTHROUGH 3
-#define ZFS_ACL_SECURE 4
+#define ZFS_ACL_RESTRICTED 4
struct znode;
+struct zfsvfs;
+struct zfs_fuid_info;
#ifdef _KERNEL
void zfs_perm_init(struct znode *, struct znode *, int, vattr_t *,
- dmu_tx_t *, cred_t *);
-#ifdef TODO
-int zfs_getacl(struct znode *, vsecattr_t *, cred_t *);
-#endif
-int zfs_mode_update(struct znode *, uint64_t, dmu_tx_t *);
+ dmu_tx_t *, cred_t *, zfs_acl_t *, zfs_fuid_info_t **);
#ifdef TODO
-int zfs_setacl(struct znode *, vsecattr_t *, cred_t *);
+int zfs_getacl(struct znode *, vsecattr_t *, boolean_t, cred_t *);
+int zfs_setacl(struct znode *, vsecattr_t *, boolean_t, cred_t *);
#endif
void zfs_acl_rele(void *);
-void zfs_ace_byteswap(ace_t *, int);
-extern int zfs_zaccess(struct znode *, int, cred_t *);
-extern int zfs_zaccess_rwx(struct znode *, mode_t, cred_t *);
+void zfs_oldace_byteswap(ace_t *, int);
+void zfs_ace_byteswap(void *, size_t, boolean_t);
+extern int zfs_zaccess(struct znode *, int, int, boolean_t, cred_t *);
+extern int zfs_zaccess_rwx(struct znode *, mode_t, int, cred_t *);
+extern int zfs_zaccess_unix(struct znode *, mode_t, cred_t *);
extern int zfs_acl_access(struct znode *, int, cred_t *);
-int zfs_acl_chmod_setattr(struct znode *, uint64_t, dmu_tx_t *);
+int zfs_acl_chmod_setattr(struct znode *, zfs_acl_t **, uint64_t);
int zfs_zaccess_delete(struct znode *, struct znode *, cred_t *);
int zfs_zaccess_rename(struct znode *, struct znode *,
struct znode *, struct znode *, cred_t *cr);
-int zfs_zaccess_v4_perm(struct znode *, int, cred_t *);
void zfs_acl_free(zfs_acl_t *);
+int zfs_vsec_2_aclp(struct zfsvfs *, vtype_t, vsecattr_t *, zfs_acl_t **);
+int zfs_aclset_common(struct znode *, zfs_acl_t *, cred_t *,
+ struct zfs_fuid_info **, dmu_tx_t *);
#endif
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_context.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_context.h
index 4deeb3c9bf75..76fdc0dce7a5 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_context.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_context.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -45,6 +44,7 @@ extern "C" {
#include <sys/cmn_err.h>
#include <sys/kmem.h>
#include <sys/taskq.h>
+#include <sys/taskqueue.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/mutex.h>
@@ -73,11 +73,19 @@ extern "C" {
#include <sys/ktr.h>
#include <sys/stack.h>
#include <sys/lockf.h>
+#include <sys/pathname.h>
#include <sys/policy.h>
+#include <sys/refstr.h>
#include <sys/zone.h>
#include <sys/eventhandler.h>
+#include <sys/extattr.h>
#include <sys/misc.h>
+#include <sys/sig.h>
+#include <sys/osd.h>
#include <sys/zfs_debug.h>
+#include <sys/sysevent/eventdefs.h>
+#include <sys/u8_textprep.h>
+#include <sys/fm/util.h>
#include <machine/stdarg.h>
@@ -99,6 +107,14 @@ extern "C" {
#define CPU_SEQID (curcpu)
+#define tsd_create(keyp, destructor) do { \
+ *(keyp) = osd_thread_register((destructor)); \
+ KASSERT(*(keyp) > 0, ("cannot register OSD")); \
+} while (0)
+#define tsd_destroy(keyp) osd_thread_deregister(*(keyp))
+#define tsd_get(key) osd_thread_get(curthread, (key))
+#define tsd_set(key, value) osd_thread_set(curthread, (key), (value))
+
#ifdef __cplusplus
}
#endif
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h
index a676533ac4a2..905e8dd2c0e3 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -57,7 +56,8 @@ int zfsctl_destroy_snapshot(const char *snapname, int force);
int zfsctl_umount_snapshots(vfs_t *, int, cred_t *);
int zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
- int flags, vnode_t *rdir, cred_t *cr);
+ int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
+ int *direntflags, pathname_t *realpnp);
int zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_dir.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_dir.h
index f60d614953f3..ebb66e8ae4e9 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_dir.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_dir.h
@@ -28,6 +28,7 @@
#pragma ident "%Z%%M% %I% %E% SMI"
+#include <sys/pathname.h>
#include <sys/dmu.h>
#include <sys/zfs_znode.h>
@@ -41,6 +42,8 @@ extern "C" {
#define ZSHARED 0x0004 /* shared access (zfs_dirlook()) */
#define ZXATTR 0x0008 /* we want the xattr dir */
#define ZRENAMING 0x0010 /* znode is being renamed */
+#define ZCILOOK 0x0020 /* case-insensitive lookup requested */
+#define ZCIEXACT 0x0040 /* c-i requires c-s match (rename) */
/* mknode flags */
#define IS_ROOT_NODE 0x01 /* create a root node */
@@ -48,15 +51,17 @@ extern "C" {
#define IS_REPLAY 0x04 /* we are replaying intent log */
extern int zfs_dirent_lock(zfs_dirlock_t **, znode_t *, char *, znode_t **,
- int);
+ int, int *, pathname_t *);
extern void zfs_dirent_unlock(zfs_dirlock_t *);
extern int zfs_link_create(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int);
extern int zfs_link_destroy(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int,
boolean_t *);
-extern int zfs_dirlook(znode_t *, char *, vnode_t **);
-extern void zfs_mknode(znode_t *, vattr_t *, uint64_t *,
- dmu_tx_t *, cred_t *, uint_t, znode_t **, int);
+extern int zfs_dirlook(znode_t *, char *, vnode_t **, int, int *,
+ pathname_t *);
+extern void zfs_mknode(znode_t *, vattr_t *, dmu_tx_t *, cred_t *,
+ uint_t, znode_t **, int, zfs_acl_t *, zfs_fuid_info_t **);
extern void zfs_rmnode(znode_t *);
+extern void zfs_dl_name_switch(zfs_dirlock_t *dl, char *new, char **old);
extern boolean_t zfs_dirempty(znode_t *);
extern void zfs_unlinked_add(znode_t *, dmu_tx_t *);
extern void zfs_unlinked_drain(zfsvfs_t *zfsvfs);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_fuid.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_fuid.h
new file mode 100644
index 000000000000..8d73b41938df
--- /dev/null
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_fuid.h
@@ -0,0 +1,125 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_FS_ZFS_FUID_H
+#define _SYS_FS_ZFS_FUID_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#ifdef _KERNEL
+#include <sys/kidmap.h>
+#include <sys/dmu.h>
+#include <sys/zfs_vfsops.h>
+#endif
+#include <sys/avl.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum {
+ ZFS_OWNER,
+ ZFS_GROUP,
+ ZFS_ACE_USER,
+ ZFS_ACE_GROUP
+} zfs_fuid_type_t;
+
+/*
+ * Estimate space needed for one more fuid table entry.
+ * for now assume its current size + 1K
+ */
+#define FUID_SIZE_ESTIMATE(z) (z->z_fuid_size + (SPA_MINBLOCKSIZE << 1))
+
+#define FUID_INDEX(x) (x >> 32)
+#define FUID_RID(x) (x & 0xffffffff)
+#define FUID_ENCODE(idx, rid) ((idx << 32) | rid)
+/*
+ * FUIDs cause problems for the intent log
+ * we need to replay the creation of the FUID,
+ * but we can't count on the idmapper to be around
+ * and during replay the FUID index may be different than
+ * before. Also, if an ACL has 100 ACEs and 12 different
+ * domains we don't want to log 100 domain strings, but rather
+ * just the unique 12.
+ */
+
+/*
+ * The FUIDs in the log will index into
+ * domain string table and the bottom half will be the rid.
+ * Used for mapping ephemeral uid/gid during ACL setting to FUIDs
+ */
+typedef struct zfs_fuid {
+ list_node_t z_next;
+ uint64_t z_id; /* uid/gid being converted to fuid */
+ uint64_t z_domidx; /* index in AVL domain table */
+ uint64_t z_logfuid; /* index for domain in log */
+} zfs_fuid_t;
+
+/* list of unique domains */
+typedef struct zfs_fuid_domain {
+ list_node_t z_next;
+ uint64_t z_domidx; /* AVL tree idx */
+ const char *z_domain; /* domain string */
+} zfs_fuid_domain_t;
+
+/*
+ * FUID information necessary for logging create, setattr, and setacl.
+ */
+typedef struct zfs_fuid_info {
+ list_t z_fuids;
+ list_t z_domains;
+ uint64_t z_fuid_owner;
+ uint64_t z_fuid_group;
+ char **z_domain_table; /* Used during replay */
+ uint32_t z_fuid_cnt; /* How many fuids in z_fuids */
+ uint32_t z_domain_cnt; /* How many domains */
+ size_t z_domain_str_sz; /* len of domain strings z_domain list */
+} zfs_fuid_info_t;
+
+#ifdef _KERNEL
+struct znode;
+extern uid_t zfs_fuid_map_id(zfsvfs_t *, uint64_t, cred_t *, zfs_fuid_type_t);
+extern void zfs_fuid_destroy(zfsvfs_t *);
+extern uint64_t zfs_fuid_create_cred(zfsvfs_t *, zfs_fuid_type_t,
+ dmu_tx_t *, cred_t *, zfs_fuid_info_t **);
+extern uint64_t zfs_fuid_create(zfsvfs_t *, uint64_t, cred_t *, zfs_fuid_type_t,
+ dmu_tx_t *, zfs_fuid_info_t **);
+extern void zfs_fuid_map_ids(struct znode *zp, cred_t *cr, uid_t *uid,
+ uid_t *gid);
+extern zfs_fuid_info_t *zfs_fuid_info_alloc(void);
+extern void zfs_fuid_info_free();
+extern boolean_t zfs_groupmember(zfsvfs_t *, uint64_t, cred_t *);
+#endif
+
+char *zfs_fuid_idx_domain(avl_tree_t *, uint32_t);
+uint64_t zfs_fuid_table_load(objset_t *, uint64_t, avl_tree_t *, avl_tree_t *);
+void zfs_fuid_table_destroy(avl_tree_t *, avl_tree_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_FS_ZFS_FUID_H */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
index 61a0a9ebdc2e..05a21c846ee8 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -31,6 +31,11 @@
#include <sys/cred.h>
#include <sys/dmu.h>
#include <sys/zio.h>
+#include <sys/dsl_deleg.h>
+
+#ifdef _KERNEL
+#include <sys/nvpair.h>
+#endif /* _KERNEL */
#ifdef __cplusplus
extern "C" {
@@ -42,9 +47,13 @@ extern "C" {
#define ZFS_SNAPDIR_HIDDEN 0
#define ZFS_SNAPDIR_VISIBLE 1
-#define DMU_BACKUP_VERSION (1ULL)
+#define DMU_BACKUP_STREAM_VERSION (1ULL)
+#define DMU_BACKUP_HEADER_VERSION (2ULL)
#define DMU_BACKUP_MAGIC 0x2F5bacbacULL
+#define DRR_FLAG_CLONE (1<<0)
+#define DRR_FLAG_CI_DATA (1<<1)
+
/*
* zfs ioctl command structure
*/
@@ -53,14 +62,14 @@ typedef struct dmu_replay_record {
DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS,
DRR_WRITE, DRR_FREE, DRR_END,
} drr_type;
- uint32_t drr_pad;
+ uint32_t drr_payloadlen;
union {
struct drr_begin {
uint64_t drr_magic;
uint64_t drr_version;
uint64_t drr_creation_time;
dmu_objset_type_t drr_type;
- uint32_t drr_pad;
+ uint32_t drr_flags;
uint64_t drr_toguid;
uint64_t drr_fromguid;
char drr_toname[MAXNAMELEN];
@@ -109,48 +118,71 @@ typedef struct zinject_record {
uint32_t zi_error;
uint64_t zi_type;
uint32_t zi_freq;
+ uint32_t zi_pad; /* pad out to 64 bit alignment */
} zinject_record_t;
#define ZINJECT_NULL 0x1
#define ZINJECT_FLUSH_ARC 0x2
#define ZINJECT_UNLOAD_SPA 0x4
+typedef struct zfs_share {
+ uint64_t z_exportdata;
+ uint64_t z_sharedata;
+ uint64_t z_sharetype; /* 0 = share, 1 = unshare */
+ uint64_t z_sharemax; /* max length of share string */
+} zfs_share_t;
+
+/*
+ * ZFS file systems may behave the usual, POSIX-compliant way, where
+ * name lookups are case-sensitive. They may also be set up so that
+ * all the name lookups are case-insensitive, or so that only some
+ * lookups, the ones that set an FIGNORECASE flag, are case-insensitive.
+ */
+typedef enum zfs_case {
+ ZFS_CASE_SENSITIVE,
+ ZFS_CASE_INSENSITIVE,
+ ZFS_CASE_MIXED
+} zfs_case_t;
+
typedef struct zfs_cmd {
char zc_name[MAXPATHLEN];
- char zc_value[MAXPATHLEN * 2];
+ char zc_value[MAXPATHLEN];
+ char zc_string[MAXNAMELEN];
uint64_t zc_guid;
- uint64_t zc_nvlist_src; /* really (char *) */
+ uint64_t zc_nvlist_conf; /* really (char *) */
+ uint64_t zc_nvlist_conf_size;
+ uint64_t zc_nvlist_src; /* really (char *) */
uint64_t zc_nvlist_src_size;
- uint64_t zc_nvlist_dst; /* really (char *) */
+ uint64_t zc_nvlist_dst; /* really (char *) */
uint64_t zc_nvlist_dst_size;
uint64_t zc_cookie;
- uint64_t zc_cred;
- uint64_t zc_dev;
uint64_t zc_objset_type;
- uint64_t zc_history; /* really (char *) */
- uint64_t zc_history_len;
+ uint64_t zc_perm_action;
+ uint64_t zc_history; /* really (char *) */
+ uint64_t zc_history_len;
uint64_t zc_history_offset;
uint64_t zc_obj;
+ zfs_share_t zc_share;
uint64_t zc_jailid;
dmu_objset_stats_t zc_objset_stats;
struct drr_begin zc_begin_record;
zinject_record_t zc_inject_record;
} zfs_cmd_t;
-#ifdef _KERNEL
-typedef struct zfs_create_data {
- cred_t *zc_cred;
- dev_t zc_dev;
- nvlist_t *zc_props;
-} zfs_create_data_t;
-#endif
-
#define ZVOL_MAX_MINOR (1 << 16)
#define ZFS_MIN_MINOR (ZVOL_MAX_MINOR + 1)
#ifdef _KERNEL
-extern int zfs_secpolicy_write(const char *dataset, cred_t *cr);
+typedef struct zfs_creat {
+ nvlist_t *zct_zplprops;
+ nvlist_t *zct_props;
+} zfs_creat_t;
+
+extern int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr);
+extern int zfs_secpolicy_rename_perms(const char *from,
+ const char *to, cred_t *cr);
+extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr);
extern int zfs_busy(void);
extern int zfs_unmount_snap(char *, void *);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h
index aa82cc178091..8d53c02b77aa 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -31,6 +31,8 @@
#include <sys/list.h>
#include <sys/vfs.h>
#include <sys/zil.h>
+#include <sys/rrwlock.h>
+#include <sys/zfs_ioctl.h>
#ifdef __cplusplus
extern "C" {
@@ -46,35 +48,50 @@ struct zfsvfs {
uint64_t z_unlinkedobj; /* id of unlinked zapobj */
uint64_t z_max_blksz; /* maximum block size for files */
uint64_t z_assign; /* TXG_NOWAIT or set by zil_replay() */
+ uint64_t z_fuid_obj; /* fuid table object number */
+ uint64_t z_fuid_size; /* fuid table size */
+ avl_tree_t z_fuid_idx; /* fuid tree keyed by index */
+ avl_tree_t z_fuid_domain; /* fuid tree keyed by domain */
+ krwlock_t z_fuid_lock; /* fuid lock */
+ boolean_t z_fuid_loaded; /* fuid tables are loaded */
+ struct zfs_fuid_info *z_fuid_replay; /* fuid info for replay */
zilog_t *z_log; /* intent log pointer */
uint_t z_acl_mode; /* acl chmod/mode behavior */
uint_t z_acl_inherit; /* acl inheritance behavior */
+ zfs_case_t z_case; /* case-sense */
+ boolean_t z_utf8; /* utf8-only */
+ int z_norm; /* normalization flags */
boolean_t z_atime; /* enable atimes mount option */
- boolean_t z_unmounted1; /* unmounted phase 1 */
- boolean_t z_unmounted2; /* unmounted phase 2 */
- uint32_t z_op_cnt; /* vnode/vfs operations ref count */
- krwlock_t z_um_lock; /* rw lock for umount phase 2 */
+ boolean_t z_unmounted; /* unmounted */
+ rrwlock_t z_teardown_lock;
+ krwlock_t z_teardown_inactive_lock;
list_t z_all_znodes; /* all vnodes in the fs */
kmutex_t z_znodes_lock; /* lock for z_all_znodes */
vnode_t *z_ctldir; /* .zfs directory pointer */
boolean_t z_show_ctldir; /* expose .zfs in the root dir */
boolean_t z_issnap; /* true if this is a snapshot */
+ boolean_t z_vscan; /* virus scan on/off */
+ boolean_t z_use_fuids; /* version allows fuids */
+ kmutex_t z_online_recv_lock; /* recv in prog grabs as WRITER */
+ uint64_t z_version; /* ZPL version */
#define ZFS_OBJ_MTX_SZ 64
kmutex_t z_hold_mtx[ZFS_OBJ_MTX_SZ]; /* znode hold locks */
};
/*
- * The total file ID size is limited to 12 bytes (including the length
- * field) in the NFSv2 protocol. For historical reasons, this same limit
- * is currently being imposed by the Solaris NFSv3 implementation...
- * although the protocol actually permits a maximum of 64 bytes. It will
- * not be possible to expand beyond 12 bytes without abandoning support
- * of NFSv2 and making some changes to the Solaris NFSv3 implementation.
+ * Normal filesystems (those not under .zfs/snapshot) have a total
+ * file ID size limited to 12 bytes (including the length field) due to
+ * NFSv2 protocol's limitation of 32 bytes for a filehandle. For historical
+ * reasons, this same limit is being imposed by the Solaris NFSv3 implementation
+ * (although the NFSv3 protocol actually permits a maximum of 64 bytes). It
+ * is not possible to expand beyond 12 bytes without abandoning support
+ * of NFSv2.
*
- * For the time being, we will partition up the available space as follows:
+ * For normal filesystems, we partition up the available space as follows:
* 2 bytes fid length (required)
* 6 bytes object number (48 bits)
* 4 bytes generation number (32 bits)
+ *
* We reserve only 48 bits for the object number, as this is the limit
* currently defined and imposed by the DMU.
*/
@@ -84,6 +101,22 @@ typedef struct zfid_short {
uint8_t zf_gen[4]; /* gen[i] = gen >> (8 * i) */
} zfid_short_t;
+/*
+ * Filesystems under .zfs/snapshot have a total file ID size of 22 bytes
+ * (including the length field). This makes files under .zfs/snapshot
+ * accessible by NFSv3 and NFSv4, but not NFSv2.
+ *
+ * For files under .zfs/snapshot, we partition up the available space
+ * as follows:
+ * 2 bytes fid length (required)
+ * 6 bytes object number (48 bits)
+ * 4 bytes generation number (32 bits)
+ * 6 bytes objset id (48 bits)
+ * 4 bytes currently just zero (32 bits)
+ *
+ * We reserve only 48 bits for the object number and objset id, as these are
+ * the limits currently defined and imposed by the DMU.
+ */
typedef struct zfid_long {
zfid_short_t z_fid;
uint8_t zf_setid[6]; /* obj[i] = obj >> (8 * i) */
@@ -93,6 +126,12 @@ typedef struct zfid_long {
#define SHORT_FID_LEN (sizeof (zfid_short_t) - sizeof (uint16_t))
#define LONG_FID_LEN (sizeof (zfid_long_t) - sizeof (uint16_t))
+extern uint_t zfs_fsyncer_key;
+extern int zfs_super_owner;
+
+extern int zfs_suspend_fs(zfsvfs_t *zfsvfs, char *osname, int *mode);
+extern int zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode);
+
#ifdef __cplusplus
}
#endif
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
index 6b2923298df2..a0cf44064970 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
@@ -19,19 +19,18 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_FS_ZFS_ZNODE_H
#define _SYS_FS_ZFS_ZNODE_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef _KERNEL
#include <sys/list.h>
#include <sys/dmu.h>
#include <sys/zfs_vfsops.h>
+#include <sys/rrwlock.h>
#endif
#include <sys/zfs_acl.h>
#include <sys/zil.h>
@@ -41,34 +40,62 @@ extern "C" {
#endif
/*
- * Define special zfs pflags
+ * Additional file level attributes, that are stored
+ * in the upper half of zp_flags
*/
-#define ZFS_XATTR 0x1 /* is an extended attribute */
-#define ZFS_INHERIT_ACE 0x2 /* ace has inheritable ACEs */
-#define ZFS_ACL_TRIVIAL 0x4 /* files ACL is trivial */
+#define ZFS_READONLY 0x0000000100000000
+#define ZFS_HIDDEN 0x0000000200000000
+#define ZFS_SYSTEM 0x0000000400000000
+#define ZFS_ARCHIVE 0x0000000800000000
+#define ZFS_IMMUTABLE 0x0000001000000000
+#define ZFS_NOUNLINK 0x0000002000000000
+#define ZFS_APPENDONLY 0x0000004000000000
+#define ZFS_NODUMP 0x0000008000000000
+#define ZFS_OPAQUE 0x0000010000000000
+#define ZFS_AV_QUARANTINED 0x0000020000000000
+#define ZFS_AV_MODIFIED 0x0000040000000000
+
+#define ZFS_ATTR_SET(zp, attr, value) \
+{ \
+ if (value) \
+ zp->z_phys->zp_flags |= attr; \
+ else \
+ zp->z_phys->zp_flags &= ~attr; \
+}
-#define MASTER_NODE_OBJ 1
+/*
+ * Define special zfs pflags
+ */
+#define ZFS_XATTR 0x1 /* is an extended attribute */
+#define ZFS_INHERIT_ACE 0x2 /* ace has inheritable ACEs */
+#define ZFS_ACL_TRIVIAL 0x4 /* files ACL is trivial */
+#define ZFS_ACL_OBJ_ACE 0x8 /* ACL has CMPLX Object ACE */
+#define ZFS_ACL_PROTECTED 0x10 /* ACL protected */
+#define ZFS_ACL_DEFAULTED 0x20 /* ACL should be defaulted */
+#define ZFS_ACL_AUTO_INHERIT 0x40 /* ACL should be inherited */
+#define ZFS_BONUS_SCANSTAMP 0x80 /* Scanstamp in bonus area */
/*
- * special attributes for master node.
+ * Is ID ephemeral?
*/
+#define IS_EPHEMERAL(x) (x > MAXUID)
-#define ZFS_FSID "FSID"
-#define ZFS_UNLINKED_SET "DELETE_QUEUE"
-#define ZFS_ROOT_OBJ "ROOT"
-#define ZPL_VERSION_OBJ "VERSION"
-#define ZFS_PROP_BLOCKPERPAGE "BLOCKPERPAGE"
-#define ZFS_PROP_NOGROWBLOCKS "NOGROWBLOCKS"
+/*
+ * Should we use FUIDs?
+ */
+#define USE_FUIDS(version, os) (version >= ZPL_VERSION_FUID &&\
+ spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID)
-#define ZFS_FLAG_BLOCKPERPAGE 0x1
-#define ZFS_FLAG_NOGROWBLOCKS 0x2
+#define MASTER_NODE_OBJ 1
/*
- * ZPL version - rev'd whenever an incompatible on-disk format change
- * occurs. Independent of SPA/DMU/ZAP versioning.
+ * Special attributes for master node.
*/
-
-#define ZPL_VERSION 1ULL
+#define ZFS_FSID "FSID"
+#define ZFS_UNLINKED_SET "DELETE_QUEUE"
+#define ZFS_ROOT_OBJ "ROOT"
+#define ZPL_VERSION_STR "VERSION"
+#define ZFS_FUID_TABLES "FUID"
#define ZFS_MAX_BLOCKSIZE (SPA_MAXBLOCKSIZE)
@@ -83,14 +110,20 @@ extern "C" {
#define ZFS_MAXNAMELEN (MAXNAMELEN - 1)
/*
+ * Convert mode bits (zp_mode) to BSD-style DT_* values for storing in
+ * the directory entries.
+ */
+#ifndef IFTODT
+#define IFTODT(mode) (((mode) & S_IFMT) >> 12)
+#endif
+
+/*
* The directory entry has the type (currently unused on Solaris) in the
* top 4 bits, and the object number in the low 48 bits. The "middle"
* 12 bits are unused.
*/
#define ZFS_DIRENT_TYPE(de) BF64_GET(de, 60, 4)
#define ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48)
-#define ZFS_DIRENT_MAKE(type, obj) (((uint64_t)type << 60) | obj)
-
/*
* This is the persistent portion of the znode. It is stored
@@ -112,8 +145,9 @@ typedef struct znode_phys {
uint64_t zp_flags; /* 120 - persistent flags */
uint64_t zp_uid; /* 128 - file owner */
uint64_t zp_gid; /* 136 - owning group */
- uint64_t zp_pad[4]; /* 144 - future */
- zfs_znode_acl_t zp_acl; /* 176 - 263 ACL */
+ uint64_t zp_zap; /* 144 - extra attributes */
+ uint64_t zp_pad[3]; /* 152 - future */
+ zfs_acl_phys_t zp_acl; /* 176 - 263 ACL */
/*
* Data may pad out any remaining bytes in the znode buffer, eg:
*
@@ -121,7 +155,9 @@ typedef struct znode_phys {
* |<-- dnode (192) --->|<----------- "bonus" buffer (320) ---------->|
* |<---- znode (264) ---->|<---- data (56) ---->|
*
- * At present, we only use this space to store symbolic links.
+ * At present, we use this space for the following:
+ * - symbolic links
+ * - 32-byte anti-virus scanstamp (regular files only)
*/
} znode_phys_t;
@@ -153,12 +189,12 @@ typedef struct znode {
avl_tree_t z_range_avl; /* avl tree of file range locks */
uint8_t z_unlinked; /* file has been unlinked */
uint8_t z_atime_dirty; /* atime needs to be synced */
- uint8_t z_dbuf_held; /* Is z_dbuf already held? */
uint8_t z_zn_prefetch; /* Prefetch znodes? */
uint_t z_blksz; /* block size in bytes */
uint_t z_seq; /* modification sequence number */
uint64_t z_mapcnt; /* number of pages mapped to file */
uint64_t z_last_itx; /* last ZIL itx on this znode */
+ uint64_t z_gen; /* generation (same as zp_gen) */
uint32_t z_sync_cnt; /* synchronous open count */
kmutex_t z_acl_lock; /* acl data lock */
list_node_t z_link_node; /* all znodes in fs link */
@@ -167,6 +203,8 @@ typedef struct znode {
*/
znode_phys_t *z_phys; /* pointer to persistent znode */
dmu_buf_t *z_dbuf; /* buffer containing the z_phys */
+ /* FreeBSD-specific field. */
+ struct task z_task;
} znode_t;
@@ -195,42 +233,51 @@ typedef struct znode {
/*
* ZFS_ENTER() is called on entry to each ZFS vnode and vfs operation.
* ZFS_EXIT() must be called before exitting the vop.
+ * ZFS_VERIFY_ZP() verifies the znode is valid.
*/
#define ZFS_ENTER(zfsvfs) \
{ \
- atomic_add_32(&(zfsvfs)->z_op_cnt, 1); \
- if ((zfsvfs)->z_unmounted1) { \
+ rrw_enter(&(zfsvfs)->z_teardown_lock, RW_READER, FTAG); \
+ if ((zfsvfs)->z_unmounted) { \
ZFS_EXIT(zfsvfs); \
return (EIO); \
} \
}
-#define ZFS_EXIT(zfsvfs) atomic_add_32(&(zfsvfs)->z_op_cnt, -1)
+
+#define ZFS_EXIT(zfsvfs) rrw_exit(&(zfsvfs)->z_teardown_lock, FTAG)
+
+#define ZFS_VERIFY_ZP(zp) \
+ if ((zp)->z_dbuf == NULL) { \
+ ZFS_EXIT((zp)->z_zfsvfs); \
+ return (EIO); \
+ } \
/*
* Macros for dealing with dmu_buf_hold
*/
-#define ZFS_OBJ_HASH(obj_num) (obj_num & (ZFS_OBJ_MTX_SZ - 1))
-#define ZFS_OBJ_MUTEX(zp) \
- (&zp->z_zfsvfs->z_hold_mtx[ZFS_OBJ_HASH(zp->z_id)])
+#define ZFS_OBJ_HASH(obj_num) ((obj_num) & (ZFS_OBJ_MTX_SZ - 1))
+#define ZFS_OBJ_MUTEX(zfsvfs, obj_num) \
+ (&(zfsvfs)->z_hold_mtx[ZFS_OBJ_HASH(obj_num)])
#define ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num) \
- mutex_enter(&zfsvfs->z_hold_mtx[ZFS_OBJ_HASH(obj_num)]);
-
+ mutex_enter(ZFS_OBJ_MUTEX((zfsvfs), (obj_num)))
+#define ZFS_OBJ_HOLD_TRYENTER(zfsvfs, obj_num) \
+ mutex_tryenter(ZFS_OBJ_MUTEX((zfsvfs), (obj_num)))
#define ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num) \
- mutex_exit(&zfsvfs->z_hold_mtx[ZFS_OBJ_HASH(obj_num)])
+ mutex_exit(ZFS_OBJ_MUTEX((zfsvfs), (obj_num)))
/*
* Macros to encode/decode ZFS stored time values from/to struct timespec
*/
#define ZFS_TIME_ENCODE(tp, stmp) \
{ \
- stmp[0] = (uint64_t)(tp)->tv_sec; \
- stmp[1] = (uint64_t)(tp)->tv_nsec; \
+ (stmp)[0] = (uint64_t)(tp)->tv_sec; \
+ (stmp)[1] = (uint64_t)(tp)->tv_nsec; \
}
#define ZFS_TIME_DECODE(tp, stmp) \
{ \
- (tp)->tv_sec = (time_t)stmp[0]; \
- (tp)->tv_nsec = (long)stmp[1]; \
+ (tp)->tv_sec = (time_t)(stmp)[0]; \
+ (tp)->tv_nsec = (long)(stmp)[1]; \
}
/*
@@ -244,9 +291,10 @@ typedef struct znode {
if ((zfsvfs)->z_atime && !((zfsvfs)->z_vfs->vfs_flag & VFS_RDONLY)) \
zfs_time_stamper(zp, ACCESSED, NULL)
-extern int zfs_init_fs(zfsvfs_t *, znode_t **, cred_t *);
+extern int zfs_init_fs(zfsvfs_t *, znode_t **);
extern void zfs_set_dataprop(objset_t *);
-extern void zfs_create_fs(objset_t *os, cred_t *cr, dmu_tx_t *tx);
+extern void zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *,
+ dmu_tx_t *tx);
extern void zfs_time_stamper(znode_t *, uint_t, dmu_tx_t *);
extern void zfs_time_stamper_locked(znode_t *, uint_t, dmu_tx_t *);
extern void zfs_grow_blocksize(znode_t *, uint64_t, dmu_tx_t *);
@@ -254,33 +302,43 @@ extern int zfs_freesp(znode_t *, uint64_t, uint64_t, int, boolean_t);
extern void zfs_znode_init(void);
extern void zfs_znode_fini(void);
extern int zfs_zget(zfsvfs_t *, uint64_t, znode_t **);
+extern int zfs_rezget(znode_t *);
extern void zfs_zinactive(znode_t *);
extern void zfs_znode_delete(znode_t *, dmu_tx_t *);
extern void zfs_znode_free(znode_t *);
extern void zfs_remove_op_tables();
extern int zfs_create_op_tables();
extern dev_t zfs_cmpldev(uint64_t);
-
-extern void zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *dzp, znode_t *zp, char *name);
-extern void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, int txtype,
+extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value);
+extern int zfs_set_version(const char *name, uint64_t newvers);
+extern int zfs_get_stats(objset_t *os, nvlist_t *nv);
+extern void zfs_znode_dmu_fini(znode_t *);
+
+extern void zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
+ znode_t *dzp, znode_t *zp, char *name, vsecattr_t *, zfs_fuid_info_t *,
+ vattr_t *vap);
+extern int zfs_log_create_txtype(zil_create_t, vsecattr_t *vsecp,
+ vattr_t *vap);
+extern void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *dzp, char *name);
-extern void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, int txtype,
+extern void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *dzp, znode_t *zp, char *name);
-extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, int txtype,
+extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *dzp, znode_t *zp, char *name, char *link);
-extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, int txtype,
+extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp);
extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, offset_t off, ssize_t len, int ioflag);
extern void zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, uint64_t off, uint64_t len);
extern void zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *zp, vattr_t *vap, uint_t mask_applied);
+ znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp);
#ifndef ZFS_NO_ACL
-extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *zp, int aclcnt, ace_t *z_ace);
+extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
+ vsecattr_t *vsecp, zfs_fuid_info_t *fuidp);
#endif
+extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap);
+extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
extern zil_get_data_t zfs_get_data;
extern zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE];
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
index 947ba9fa6076..4d02d14f7075 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZIL_H
#define _SYS_ZIL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/spa.h>
#include <sys/zio.h>
@@ -88,22 +86,61 @@ typedef struct zil_trailer {
#define ZIL_ZC_OBJSET 2
#define ZIL_ZC_SEQ 3
+typedef enum zil_create {
+ Z_FILE,
+ Z_DIR,
+ Z_XATTRDIR,
+} zil_create_t;
+
+/*
+ * size of xvattr log section.
+ * its composed of lr_attr_t + xvattr bitmap + 2 64 bit timestamps
+ * for create time and a single 64 bit integer for all of the attributes,
+ * and 4 64 bit integers (32 bytes) for the scanstamp.
+ *
+ */
+
+#define ZIL_XVAT_SIZE(mapsize) \
+ sizeof (lr_attr_t) + (sizeof (uint32_t) * (mapsize - 1)) + \
+ (sizeof (uint64_t) * 7)
+
+/*
+ * Size of ACL in log. The ACE data is padded out to properly align
+ * on 8 byte boundary.
+ */
+
+#define ZIL_ACE_LENGTH(x) (roundup(x, sizeof (uint64_t)))
+
/*
* Intent log transaction types and record structures
*/
-#define TX_CREATE 1 /* Create file */
-#define TX_MKDIR 2 /* Make directory */
-#define TX_MKXATTR 3 /* Make XATTR directory */
-#define TX_SYMLINK 4 /* Create symbolic link to a file */
-#define TX_REMOVE 5 /* Remove file */
-#define TX_RMDIR 6 /* Remove directory */
-#define TX_LINK 7 /* Create hard link to a file */
-#define TX_RENAME 8 /* Rename a file */
-#define TX_WRITE 9 /* File write */
-#define TX_TRUNCATE 10 /* Truncate a file */
-#define TX_SETATTR 11 /* Set file attributes */
-#define TX_ACL 12 /* Set acl */
-#define TX_MAX_TYPE 13 /* Max transaction type */
+#define TX_CREATE 1 /* Create file */
+#define TX_MKDIR 2 /* Make directory */
+#define TX_MKXATTR 3 /* Make XATTR directory */
+#define TX_SYMLINK 4 /* Create symbolic link to a file */
+#define TX_REMOVE 5 /* Remove file */
+#define TX_RMDIR 6 /* Remove directory */
+#define TX_LINK 7 /* Create hard link to a file */
+#define TX_RENAME 8 /* Rename a file */
+#define TX_WRITE 9 /* File write */
+#define TX_TRUNCATE 10 /* Truncate a file */
+#define TX_SETATTR 11 /* Set file attributes */
+#define TX_ACL_V0 12 /* Set old formatted ACL */
+#define TX_ACL 13 /* Set ACL */
+#define TX_CREATE_ACL 14 /* create with ACL */
+#define TX_CREATE_ATTR 15 /* create + attrs */
+#define TX_CREATE_ACL_ATTR 16 /* create with ACL + attrs */
+#define TX_MKDIR_ACL 17 /* mkdir with ACL */
+#define TX_MKDIR_ATTR 18 /* mkdir with attr */
+#define TX_MKDIR_ACL_ATTR 19 /* mkdir with ACL + attrs */
+#define TX_MAX_TYPE 20 /* Max transaction type */
+
+/*
+ * The transactions for mkdir, symlink, remove, rmdir, link, and rename
+ * may have the following bit set, indicating the original request
+ * specified case-insensitive handling of names.
+ */
+#define TX_CI ((uint64_t)0x1 << 63) /* case-insensitive behavior requested */
/*
* Format of log records.
@@ -124,6 +161,23 @@ typedef struct { /* common log record header */
uint64_t lrc_seq; /* see comment above */
} lr_t;
+/*
+ * Handle option extended vattr attributes.
+ *
+ * Whenever new attributes are added the version number
+ * will need to be updated as will code in
+ * zfs_log.c and zfs_replay.c
+ */
+typedef struct {
+ uint32_t lr_attr_masksize; /* number of elements in array */
+ uint32_t lr_attr_bitmap; /* First entry of array */
+ /* remainder of array and any additional fields */
+} lr_attr_t;
+
+/*
+ * log record for creates without optional ACL.
+ * This log record does support optional xvattr_t attributes.
+ */
typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_doid; /* object id of directory */
@@ -136,8 +190,42 @@ typedef struct {
uint64_t lr_rdev; /* rdev of object to create */
/* name of object to create follows this */
/* for symlinks, link content follows name */
+ /* for creates with xvattr data, the name follows the xvattr info */
} lr_create_t;
+/*
+ * FUID ACL record will be an array of ACEs from the original ACL.
+ * If this array includes ephemeral IDs, the record will also include
+ * an array of log-specific FUIDs to replace the ephemeral IDs.
+ * Only one copy of each unique domain will be present, so the log-specific
+ * FUIDs will use an index into a compressed domain table. On replay this
+ * information will be used to construct real FUIDs (and bypass idmap,
+ * since it may not be available).
+ */
+
+/*
+ * Log record for creates with optional ACL
+ * This log record is also used for recording any FUID
+ * information needed for replaying the create. If the
+ * file doesn't have any actual ACEs then the lr_aclcnt
+ * would be zero.
+ */
+typedef struct {
+ lr_create_t lr_create; /* common create portion */
+ uint64_t lr_aclcnt; /* number of ACEs in ACL */
+ uint64_t lr_domcnt; /* number of unique domains */
+ uint64_t lr_fuidcnt; /* number of real fuids */
+ uint64_t lr_acl_bytes; /* number of bytes in ACL */
+ uint64_t lr_acl_flags; /* ACL flags */
+ /* lr_acl_bytes number of variable sized ace's follows */
+ /* if create is also setting xvattr's, then acl data follows xvattr */
+ /* if ACE FUIDs are needed then they will follow the xvattr_t */
+ /* Following the FUIDs will be the domain table information. */
+ /* The FUIDs for the owner and group will be in the lr_create */
+ /* portion of the record. */
+ /* name follows ACL data */
+} lr_acl_create_t;
+
typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_doid; /* obj id of directory */
@@ -185,6 +273,7 @@ typedef struct {
uint64_t lr_size; /* size to set */
uint64_t lr_atime[2]; /* access time */
uint64_t lr_mtime[2]; /* modification time */
+ /* optional attribute lr_attr_t may be here */
} lr_setattr_t;
typedef struct {
@@ -192,6 +281,17 @@ typedef struct {
uint64_t lr_foid; /* obj id of file */
uint64_t lr_aclcnt; /* number of acl entries */
/* lr_aclcnt number of ace_t entries follow this */
+} lr_acl_v0_t;
+
+typedef struct {
+ lr_t lr_common; /* common portion of log record */
+ uint64_t lr_foid; /* obj id of file */
+ uint64_t lr_aclcnt; /* number of ACEs in ACL */
+ uint64_t lr_domcnt; /* number of unique domains */
+ uint64_t lr_fuidcnt; /* number of real fuids */
+ uint64_t lr_acl_bytes; /* number of bytes in ACL */
+ uint64_t lr_acl_flags; /* ACL flags */
+ /* lr_acl_bytes number of variable sized ace's follows */
} lr_acl_t;
/*
@@ -213,6 +313,7 @@ typedef struct itx {
void *itx_private; /* type-specific opaque data */
itx_wr_state_t itx_wr_state; /* write state */
uint8_t itx_sync; /* synchronous transaction */
+ uint64_t itx_sod; /* record size on disk */
lr_t itx_lr; /* common part of log record */
/* followed by type-specific part of lr_xx_t and its immediate data */
} itx_t;
@@ -234,6 +335,7 @@ typedef void zil_parse_blk_func_t(zilog_t *zilog, blkptr_t *bp, void *arg,
typedef void zil_parse_lr_func_t(zilog_t *zilog, lr_t *lr, void *arg,
uint64_t txg);
typedef int zil_replay_func_t();
+typedef void zil_replay_cleaner_t();
typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, zio_t *zio);
extern uint64_t zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
@@ -249,15 +351,19 @@ extern zilog_t *zil_open(objset_t *os, zil_get_data_t *get_data);
extern void zil_close(zilog_t *zilog);
extern void zil_replay(objset_t *os, void *arg, uint64_t *txgp,
- zil_replay_func_t *replay_func[TX_MAX_TYPE]);
+ zil_replay_func_t *replay_func[TX_MAX_TYPE],
+ zil_replay_cleaner_t *replay_cleaner);
extern void zil_destroy(zilog_t *zilog, boolean_t keep_first);
+extern void zil_rollback_destroy(zilog_t *zilog, dmu_tx_t *tx);
-extern itx_t *zil_itx_create(int txtype, size_t lrsize);
+extern itx_t *zil_itx_create(uint64_t txtype, size_t lrsize);
extern uint64_t zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx);
extern void zil_commit(zilog_t *zilog, uint64_t seq, uint64_t oid);
extern int zil_claim(char *osname, void *txarg);
+extern int zil_check_log_chain(char *osname, void *txarg);
+extern int zil_clear_log_chain(char *osname, void *txarg);
extern void zil_sync(zilog_t *zilog, dmu_tx_t *tx);
extern void zil_clean(zilog_t *zilog);
extern int zil_is_committed(zilog_t *zilog);
@@ -265,7 +371,7 @@ extern int zil_is_committed(zilog_t *zilog);
extern int zil_suspend(zilog_t *zilog);
extern void zil_resume(zilog_t *zilog);
-extern void zil_add_vdev(zilog_t *zilog, uint64_t vdev);
+extern void zil_add_block(zilog_t *zilog, blkptr_t *bp);
extern int zil_disable;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h
index 3ecf4e4debf5..0fc800b96dea 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -51,15 +51,13 @@ typedef struct lwb {
} lwb_t;
/*
- * Vdev flushing: We use a bit map of size ZIL_VDEV_BMAP bytes.
- * Any vdev numbers beyond that use a linked list of zil_vdev_t structures.
+ * Vdev flushing: during a zil_commit(), we build up an AVL tree of the vdevs
+ * we've touched so we know which ones need a write cache flush at the end.
*/
-
-#define ZIL_VDEV_BMSZ 16 /* 16 * 8 = 128 vdevs */
-typedef struct zil_vdev {
- uint64_t vdev; /* device written */
- list_node_t vdev_seq_node; /* zilog->zl_vdev_list linkage */
-} zil_vdev_t;
+typedef struct zil_vdev_node {
+ uint64_t zv_vdev; /* vdev to be flushed */
+ avl_node_t zv_node; /* AVL tree linkage */
+} zil_vdev_node_t;
/*
* Stable storage intent log management structure. One per dataset.
@@ -91,8 +89,8 @@ struct zilog {
uint64_t zl_cur_used; /* current commit log size used */
uint64_t zl_prev_used; /* previous commit log size used */
list_t zl_lwb_list; /* in-flight log write list */
- list_t zl_vdev_list; /* list of [vdev, seq] pairs */
- uint8_t zl_vdev_bmap[ZIL_VDEV_BMSZ]; /* bitmap of vdevs */
+ kmutex_t zl_vdev_lock; /* protects zl_vdev_tree */
+ avl_tree_t zl_vdev_tree; /* vdevs to flush in zil_commit() */
taskq_t *zl_clean_taskq; /* runs lwb and itx clean tasks */
avl_tree_t zl_dva_tree; /* track DVAs during log parse */
clock_t zl_replay_time; /* lbolt of when replay started */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
index b026ae6450c6..6331567498b5 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
@@ -20,20 +20,17 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _ZIO_H
#define _ZIO_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/txg.h>
#include <sys/avl.h>
-#include <sys/dkio.h>
#include <sys/fs/zfs.h>
#include <sys/zio_impl.h>
@@ -60,10 +57,6 @@ typedef struct zio_block_tail {
(SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\
sizeof (uint64_t))
-#define ZIO_GET_IOSIZE(zio) \
- (BP_IS_GANG((zio)->io_bp) ? \
- SPA_GANGBLOCKSIZE : BP_GET_PSIZE((zio)->io_bp))
-
typedef struct zio_gbh {
blkptr_t zg_blkptr[SPA_GBH_NBLKPTRS];
uint64_t zg_filler[SPA_GBH_FILLER];
@@ -107,6 +100,10 @@ enum zio_compress {
#define ZIO_COMPRESS_ON_VALUE ZIO_COMPRESS_LZJB
#define ZIO_COMPRESS_DEFAULT ZIO_COMPRESS_OFF
+#define ZIO_FAILURE_MODE_WAIT 0
+#define ZIO_FAILURE_MODE_CONTINUE 1
+#define ZIO_FAILURE_MODE_PANIC 2
+
#define ZIO_PRIORITY_NOW (zio_priority_table[0])
#define ZIO_PRIORITY_SYNC_READ (zio_priority_table[1])
#define ZIO_PRIORITY_SYNC_WRITE (zio_priority_table[2])
@@ -121,51 +118,70 @@ enum zio_compress {
#define ZIO_FLAG_MUSTSUCCEED 0x00000
#define ZIO_FLAG_CANFAIL 0x00001
-#define ZIO_FLAG_FAILFAST 0x00002
-#define ZIO_FLAG_CONFIG_HELD 0x00004
-#define ZIO_FLAG_CONFIG_GRABBED 0x00008
+#define ZIO_FLAG_SPECULATIVE 0x00002
+#define ZIO_FLAG_CONFIG_WRITER 0x00004
+#define ZIO_FLAG_DONT_RETRY 0x00008
#define ZIO_FLAG_DONT_CACHE 0x00010
#define ZIO_FLAG_DONT_QUEUE 0x00020
-#define ZIO_FLAG_DONT_PROPAGATE 0x00040
-#define ZIO_FLAG_DONT_RETRY 0x00080
-
-#define ZIO_FLAG_PHYSICAL 0x00100
-#define ZIO_FLAG_IO_BYPASS 0x00200
-#define ZIO_FLAG_IO_REPAIR 0x00400
-#define ZIO_FLAG_SPECULATIVE 0x00800
+#define ZIO_FLAG_DONT_AGGREGATE 0x00040
+#define ZIO_FLAG_DONT_PROPAGATE 0x00080
-#define ZIO_FLAG_RESILVER 0x01000
-#define ZIO_FLAG_SCRUB 0x02000
-#define ZIO_FLAG_SCRUB_THREAD 0x04000
-#define ZIO_FLAG_SUBBLOCK 0x08000
+#define ZIO_FLAG_IO_BYPASS 0x00100
+#define ZIO_FLAG_IO_REPAIR 0x00200
+#define ZIO_FLAG_IO_RETRY 0x00400
+#define ZIO_FLAG_IO_REWRITE 0x00800
-#define ZIO_FLAG_NOBOOKMARK 0x10000
-#define ZIO_FLAG_USER 0x20000
+#define ZIO_FLAG_PROBE 0x01000
+#define ZIO_FLAG_RESILVER 0x02000
+#define ZIO_FLAG_SCRUB 0x04000
+#define ZIO_FLAG_SCRUB_THREAD 0x08000
-#define ZIO_FLAG_METADATA 0x40000
+#define ZIO_FLAG_GANG_CHILD 0x10000
#define ZIO_FLAG_GANG_INHERIT \
(ZIO_FLAG_CANFAIL | \
- ZIO_FLAG_FAILFAST | \
- ZIO_FLAG_CONFIG_HELD | \
- ZIO_FLAG_DONT_RETRY | \
- ZIO_FLAG_IO_REPAIR | \
ZIO_FLAG_SPECULATIVE | \
+ ZIO_FLAG_CONFIG_WRITER | \
+ ZIO_FLAG_DONT_RETRY | \
+ ZIO_FLAG_DONT_CACHE | \
+ ZIO_FLAG_DONT_AGGREGATE | \
ZIO_FLAG_RESILVER | \
ZIO_FLAG_SCRUB | \
ZIO_FLAG_SCRUB_THREAD)
#define ZIO_FLAG_VDEV_INHERIT \
(ZIO_FLAG_GANG_INHERIT | \
- ZIO_FLAG_DONT_CACHE | \
- ZIO_FLAG_PHYSICAL)
+ ZIO_FLAG_IO_REPAIR | \
+ ZIO_FLAG_IO_RETRY | \
+ ZIO_FLAG_PROBE)
+
+#define ZIO_PIPELINE_CONTINUE 0x100
+#define ZIO_PIPELINE_STOP 0x101
+
+#define ZIO_GANG_CHILD_FLAGS(zio) \
+ (((zio)->io_flags & ZIO_FLAG_GANG_INHERIT) | \
+ ZIO_FLAG_GANG_CHILD | ZIO_FLAG_CANFAIL)
+
+enum zio_child {
+ ZIO_CHILD_VDEV = 0,
+ ZIO_CHILD_GANG,
+ ZIO_CHILD_LOGICAL,
+ ZIO_CHILD_TYPES
+};
+
+enum zio_wait_type {
+ ZIO_WAIT_READY = 0,
+ ZIO_WAIT_DONE,
+ ZIO_WAIT_TYPES
+};
/*
- * We'll take the EILSEQ (Illegal byte sequence) errno
- * to indicate checksum errors.
+ * We'll take the EILSEQ and ENOMSG to indicate checksum errors and
+ * fragmentation.
*/
#define ECKSUM EILSEQ
+#define EFRAGS ENOMSG
typedef struct zio zio_t;
typedef void zio_done_func_t(zio_t *zio);
@@ -200,23 +216,64 @@ typedef struct zbookmark {
uint64_t zb_blkid;
} zbookmark_t;
+typedef struct zio_prop {
+ enum zio_checksum zp_checksum;
+ enum zio_compress zp_compress;
+ dmu_object_type_t zp_type;
+ uint8_t zp_level;
+ uint8_t zp_ndvas;
+} zio_prop_t;
+
+typedef struct zio_gang_node {
+ zio_gbh_phys_t *gn_gbh;
+ struct zio_gang_node *gn_child[SPA_GBH_NBLKPTRS];
+} zio_gang_node_t;
+
+typedef zio_t *zio_gang_issue_func_t(zio_t *zio, blkptr_t *bp,
+ zio_gang_node_t *gn, void *data);
+
+typedef void zio_transform_func_t(zio_t *zio, void *data, uint64_t size);
+
+typedef struct zio_transform {
+ void *zt_orig_data;
+ uint64_t zt_orig_size;
+ uint64_t zt_bufsize;
+ zio_transform_func_t *zt_transform;
+ struct zio_transform *zt_next;
+} zio_transform_t;
+
+typedef int zio_pipe_stage_t(zio_t *zio);
+
+/*
+ * The io_reexecute flags are distinct from io_flags because the child must
+ * be able to propagate them to the parent. The normal io_flags are local
+ * to the zio, not protected by any lock, and not modifiable by children;
+ * the reexecute flags are protected by io_lock, modifiable by children,
+ * and always propagated -- even when ZIO_FLAG_DONT_PROPAGATE is set.
+ */
+#define ZIO_REEXECUTE_NOW 0x01
+#define ZIO_REEXECUTE_SUSPEND 0x02
+
struct zio {
/* Core information about this I/O */
- zio_t *io_parent;
- zio_t *io_root;
- spa_t *io_spa;
zbookmark_t io_bookmark;
- enum zio_checksum io_checksum;
- enum zio_compress io_compress;
- int io_ndvas;
+ zio_prop_t io_prop;
+ zio_type_t io_type;
+ enum zio_child io_child_type;
+ int io_cmd;
+ uint8_t io_priority;
+ uint8_t io_reexecute;
+ uint8_t io_async_root;
uint64_t io_txg;
+ spa_t *io_spa;
blkptr_t *io_bp;
blkptr_t io_bp_copy;
+ zio_t *io_parent;
zio_t *io_child;
zio_t *io_sibling_prev;
zio_t *io_sibling_next;
- zio_transform_t *io_transform_stack;
zio_t *io_logical;
+ zio_transform_t *io_transform_stack;
/* Callback info */
zio_done_func_t *io_ready;
@@ -231,9 +288,9 @@ struct zio {
/* Stuff for the vdev stack */
vdev_t *io_vd;
void *io_vsd;
+ zio_done_func_t *io_vsd_free;
uint64_t io_offset;
uint64_t io_deadline;
- uint64_t io_timestamp;
avl_node_t io_offset_node;
avl_node_t io_deadline_node;
avl_tree_t *io_vdev_tree;
@@ -242,19 +299,17 @@ struct zio {
/* Internal pipeline state */
int io_flags;
- enum zio_type io_type;
- enum zio_stage io_stage;
- uint8_t io_stalled;
- uint8_t io_priority;
- struct dk_callback io_dk_callback;
- int io_cmd;
- int io_retries;
- int io_error;
- uint32_t io_numerrors;
+ zio_stage_t io_stage;
uint32_t io_pipeline;
- uint32_t io_async_stages;
- uint64_t io_children_notready;
- uint64_t io_children_notdone;
+ int io_orig_flags;
+ zio_stage_t io_orig_stage;
+ uint32_t io_orig_pipeline;
+ int io_error;
+ int io_child_error[ZIO_CHILD_TYPES];
+ uint64_t io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES];
+ uint64_t *io_stall;
+ zio_gang_node_t *io_gang_tree;
+ void *io_executor;
void *io_waiter;
kmutex_t io_lock;
kcondvar_t io_cv;
@@ -269,76 +324,76 @@ extern zio_t *zio_null(zio_t *pio, spa_t *spa,
extern zio_t *zio_root(spa_t *spa,
zio_done_func_t *done, void *private, int flags);
-extern zio_t *zio_read(zio_t *pio, spa_t *spa, blkptr_t *bp, void *data,
+extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, void *data,
uint64_t size, zio_done_func_t *done, void *private,
- int priority, int flags, zbookmark_t *zb);
+ int priority, int flags, const zbookmark_t *zb);
-extern zio_t *zio_write(zio_t *pio, spa_t *spa, int checksum, int compress,
- int ncopies, uint64_t txg, blkptr_t *bp, void *data, uint64_t size,
- zio_done_func_t *ready, zio_done_func_t *done, void *private, int priority,
- int flags, zbookmark_t *zb);
+extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
+ void *data, uint64_t size, zio_prop_t *zp,
+ zio_done_func_t *ready, zio_done_func_t *done, void *private,
+ int priority, int flags, const zbookmark_t *zb);
-extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, int checksum,
- uint64_t txg, blkptr_t *bp, void *data, uint64_t size,
- zio_done_func_t *done, void *private, int priority, int flags,
- zbookmark_t *zb);
+extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
+ void *data, uint64_t size, zio_done_func_t *done, void *private,
+ int priority, int flags, zbookmark_t *zb);
extern zio_t *zio_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
- zio_done_func_t *done, void *private);
+ zio_done_func_t *done, void *private, int flags);
extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
- zio_done_func_t *done, void *private);
+ zio_done_func_t *done, void *private, int flags);
extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
zio_done_func_t *done, void *private, int priority, int flags);
extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
uint64_t size, void *data, int checksum,
- zio_done_func_t *done, void *private, int priority, int flags);
+ zio_done_func_t *done, void *private, int priority, int flags,
+ boolean_t labels);
extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
uint64_t size, void *data, int checksum,
- zio_done_func_t *done, void *private, int priority, int flags);
+ zio_done_func_t *done, void *private, int priority, int flags,
+ boolean_t labels);
extern int zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp,
blkptr_t *old_bp, uint64_t txg);
extern void zio_free_blk(spa_t *spa, blkptr_t *bp, uint64_t txg);
+extern void zio_flush(zio_t *zio, vdev_t *vd);
extern int zio_wait(zio_t *zio);
extern void zio_nowait(zio_t *zio);
+extern void zio_execute(zio_t *zio);
+extern void zio_interrupt(zio_t *zio);
extern void *zio_buf_alloc(size_t size);
extern void zio_buf_free(void *buf, size_t size);
extern void *zio_data_buf_alloc(size_t size);
extern void zio_data_buf_free(void *buf, size_t size);
-/*
- * Move an I/O to the next stage of the pipeline and execute that stage.
- * There's no locking on io_stage because there's no legitimate way for
- * multiple threads to be attempting to process the same I/O.
- */
-extern void zio_next_stage(zio_t *zio);
-extern void zio_next_stage_async(zio_t *zio);
-extern void zio_wait_children_done(zio_t *zio);
+extern void zio_resubmit_stage_async(void *);
-/*
- * Delegate I/O to a child vdev.
- */
extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd,
uint64_t offset, void *data, uint64_t size, int type, int priority,
int flags, zio_done_func_t *done, void *private);
+extern zio_t *zio_vdev_delegated_io(vdev_t *vd, uint64_t offset,
+ void *data, uint64_t size, int type, int priority,
+ int flags, zio_done_func_t *done, void *private);
+
extern void zio_vdev_io_bypass(zio_t *zio);
extern void zio_vdev_io_reissue(zio_t *zio);
extern void zio_vdev_io_redone(zio_t *zio);
extern void zio_checksum_verified(zio_t *zio);
-extern void zio_set_gang_verifier(zio_t *zio, zio_cksum_t *zcp);
+extern int zio_worst_error(int e1, int e2);
extern uint8_t zio_checksum_select(uint8_t child, uint8_t parent);
extern uint8_t zio_compress_select(uint8_t child, uint8_t parent);
-boolean_t zio_should_retry(zio_t *zio);
+extern void zio_suspend(spa_t *spa, zio_t *zio);
+extern void zio_resume(spa_t *spa);
+extern void zio_resume_wait(spa_t *spa);
/*
* Initial setup and teardown.
@@ -358,6 +413,7 @@ extern int zio_inject_list_next(int *id, char *name, size_t buflen,
extern int zio_clear_fault(int id);
extern int zio_handle_fault_injection(zio_t *zio, int error);
extern int zio_handle_device_injection(vdev_t *vd, int error);
+extern int zio_handle_label_injection(zio_t *zio, int error);
#ifdef __cplusplus
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_checksum.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_checksum.h
index bb7bd41e0bb3..da407399da06 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_checksum.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_checksum.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZIO_CHECKSUM_H
#define _SYS_ZIO_CHECKSUM_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/zio.h>
#ifdef __cplusplus
@@ -64,7 +62,7 @@ extern zio_checksum_t fletcher_4_incremental_byteswap;
extern zio_checksum_t zio_checksum_SHA256;
-extern void zio_checksum(uint_t checksum, zio_cksum_t *zcp,
+extern void zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
void *data, uint64_t size);
extern int zio_checksum_error(zio_t *zio);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h
index d2ddbc34e922..e7503b733cc0 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _ZIO_IMPL_H
#define _ZIO_IMPL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/zfs_context.h>
#include <sys/zio.h>
@@ -38,162 +36,102 @@ extern "C" {
/*
* I/O Groups: pipeline stage definitions.
*/
-
typedef enum zio_stage {
ZIO_STAGE_OPEN = 0, /* RWFCI */
- ZIO_STAGE_WAIT_CHILDREN_READY, /* RWFCI */
- ZIO_STAGE_WRITE_COMPRESS, /* -W--- */
- ZIO_STAGE_CHECKSUM_GENERATE, /* -W--- */
+ ZIO_STAGE_ISSUE_ASYNC, /* -W--- */
- ZIO_STAGE_GANG_PIPELINE, /* -WFC- */
+ ZIO_STAGE_READ_BP_INIT, /* R---- */
+ ZIO_STAGE_WRITE_BP_INIT, /* -W--- */
+
+ ZIO_STAGE_CHECKSUM_GENERATE, /* -W--- */
- ZIO_STAGE_GET_GANG_HEADER, /* -WFC- */
- ZIO_STAGE_REWRITE_GANG_MEMBERS, /* -W--- */
- ZIO_STAGE_FREE_GANG_MEMBERS, /* --F-- */
- ZIO_STAGE_CLAIM_GANG_MEMBERS, /* ---C- */
+ ZIO_STAGE_GANG_ASSEMBLE, /* RWFC- */
+ ZIO_STAGE_GANG_ISSUE, /* RWFC- */
ZIO_STAGE_DVA_ALLOCATE, /* -W--- */
ZIO_STAGE_DVA_FREE, /* --F-- */
ZIO_STAGE_DVA_CLAIM, /* ---C- */
- ZIO_STAGE_GANG_CHECKSUM_GENERATE, /* -W--- */
-
ZIO_STAGE_READY, /* RWFCI */
ZIO_STAGE_VDEV_IO_START, /* RW--I */
ZIO_STAGE_VDEV_IO_DONE, /* RW--I */
ZIO_STAGE_VDEV_IO_ASSESS, /* RW--I */
- ZIO_STAGE_WAIT_CHILDREN_DONE, /* RWFCI */
-
ZIO_STAGE_CHECKSUM_VERIFY, /* R---- */
- ZIO_STAGE_READ_GANG_MEMBERS, /* R---- */
- ZIO_STAGE_READ_DECOMPRESS, /* R---- */
- ZIO_STAGE_DONE /* RWFCI */
+ ZIO_STAGE_DONE, /* RWFCI */
+ ZIO_STAGES
} zio_stage_t;
-/*
- * The stages for which there's some performance value in going async.
- * When compression is enabled, ZIO_STAGE_WRITE_COMPRESS is ORed in as well.
- */
-#define ZIO_ASYNC_PIPELINE_STAGES \
- ((1U << ZIO_STAGE_CHECKSUM_GENERATE) | \
- (1U << ZIO_STAGE_VDEV_IO_DONE) | \
- (1U << ZIO_STAGE_CHECKSUM_VERIFY) | \
- (1U << ZIO_STAGE_READ_DECOMPRESS))
+#define ZIO_INTERLOCK_STAGES \
+ ((1U << ZIO_STAGE_READY) | \
+ (1U << ZIO_STAGE_DONE))
-#define ZIO_VDEV_IO_PIPELINE \
+#define ZIO_INTERLOCK_PIPELINE \
+ ZIO_INTERLOCK_STAGES
+
+#define ZIO_VDEV_IO_STAGES \
((1U << ZIO_STAGE_VDEV_IO_START) | \
(1U << ZIO_STAGE_VDEV_IO_DONE) | \
(1U << ZIO_STAGE_VDEV_IO_ASSESS))
-#define ZIO_READ_PHYS_PIPELINE \
- ((1U << ZIO_STAGE_OPEN) | \
- (1U << ZIO_STAGE_WAIT_CHILDREN_READY) | \
- (1U << ZIO_STAGE_READY) | \
- ZIO_VDEV_IO_PIPELINE | \
- (1U << ZIO_STAGE_WAIT_CHILDREN_DONE) | \
- (1U << ZIO_STAGE_CHECKSUM_VERIFY) | \
+#define ZIO_VDEV_CHILD_PIPELINE \
+ (ZIO_VDEV_IO_STAGES | \
(1U << ZIO_STAGE_DONE))
-#define ZIO_READ_PIPELINE \
- ZIO_READ_PHYS_PIPELINE
+#define ZIO_READ_COMMON_STAGES \
+ (ZIO_INTERLOCK_STAGES | \
+ ZIO_VDEV_IO_STAGES | \
+ (1U << ZIO_STAGE_CHECKSUM_VERIFY))
-#define ZIO_WRITE_PHYS_PIPELINE \
- ((1U << ZIO_STAGE_OPEN) | \
- (1U << ZIO_STAGE_WAIT_CHILDREN_READY) | \
- (1U << ZIO_STAGE_CHECKSUM_GENERATE) | \
- (1U << ZIO_STAGE_READY) | \
- ZIO_VDEV_IO_PIPELINE | \
- (1U << ZIO_STAGE_WAIT_CHILDREN_DONE) | \
- (1U << ZIO_STAGE_DONE))
+#define ZIO_READ_PHYS_PIPELINE \
+ ZIO_READ_COMMON_STAGES
-#define ZIO_WRITE_COMMON_PIPELINE \
- ZIO_WRITE_PHYS_PIPELINE
+#define ZIO_READ_PIPELINE \
+ (ZIO_READ_COMMON_STAGES | \
+ (1U << ZIO_STAGE_READ_BP_INIT))
-#define ZIO_WRITE_PIPELINE \
- ((1U << ZIO_STAGE_WRITE_COMPRESS) | \
- ZIO_WRITE_COMMON_PIPELINE)
+#define ZIO_WRITE_COMMON_STAGES \
+ (ZIO_INTERLOCK_STAGES | \
+ ZIO_VDEV_IO_STAGES | \
+ (1U << ZIO_STAGE_ISSUE_ASYNC) | \
+ (1U << ZIO_STAGE_CHECKSUM_GENERATE))
-#define ZIO_GANG_STAGES \
- ((1U << ZIO_STAGE_GET_GANG_HEADER) | \
- (1U << ZIO_STAGE_REWRITE_GANG_MEMBERS) | \
- (1U << ZIO_STAGE_FREE_GANG_MEMBERS) | \
- (1U << ZIO_STAGE_CLAIM_GANG_MEMBERS) | \
- (1U << ZIO_STAGE_GANG_CHECKSUM_GENERATE) | \
- (1U << ZIO_STAGE_READ_GANG_MEMBERS))
+#define ZIO_WRITE_PHYS_PIPELINE \
+ ZIO_WRITE_COMMON_STAGES
#define ZIO_REWRITE_PIPELINE \
- ((1U << ZIO_STAGE_GANG_PIPELINE) | \
- (1U << ZIO_STAGE_GET_GANG_HEADER) | \
- (1U << ZIO_STAGE_REWRITE_GANG_MEMBERS) | \
- (1U << ZIO_STAGE_GANG_CHECKSUM_GENERATE) | \
- ZIO_WRITE_COMMON_PIPELINE)
+ (ZIO_WRITE_COMMON_STAGES | \
+ (1U << ZIO_STAGE_WRITE_BP_INIT))
-#define ZIO_WRITE_ALLOCATE_PIPELINE \
- ((1U << ZIO_STAGE_DVA_ALLOCATE) | \
- ZIO_WRITE_COMMON_PIPELINE)
+#define ZIO_WRITE_PIPELINE \
+ (ZIO_WRITE_COMMON_STAGES | \
+ (1U << ZIO_STAGE_WRITE_BP_INIT) | \
+ (1U << ZIO_STAGE_DVA_ALLOCATE))
-#define ZIO_GANG_FREE_STAGES \
- ((1U << ZIO_STAGE_GET_GANG_HEADER) | \
- (1U << ZIO_STAGE_FREE_GANG_MEMBERS))
+#define ZIO_GANG_STAGES \
+ ((1U << ZIO_STAGE_GANG_ASSEMBLE) | \
+ (1U << ZIO_STAGE_GANG_ISSUE))
#define ZIO_FREE_PIPELINE \
- ((1U << ZIO_STAGE_OPEN) | \
- (1U << ZIO_STAGE_WAIT_CHILDREN_READY) | \
- (1U << ZIO_STAGE_GANG_PIPELINE) | \
- (1U << ZIO_STAGE_GET_GANG_HEADER) | \
- (1U << ZIO_STAGE_FREE_GANG_MEMBERS) | \
- (1U << ZIO_STAGE_DVA_FREE) | \
- (1U << ZIO_STAGE_READY) | \
- (1U << ZIO_STAGE_WAIT_CHILDREN_DONE) | \
- (1U << ZIO_STAGE_DONE))
+ (ZIO_INTERLOCK_STAGES | \
+ (1U << ZIO_STAGE_DVA_FREE))
#define ZIO_CLAIM_PIPELINE \
- ((1U << ZIO_STAGE_OPEN) | \
- (1U << ZIO_STAGE_WAIT_CHILDREN_READY) | \
- (1U << ZIO_STAGE_GANG_PIPELINE) | \
- (1U << ZIO_STAGE_GET_GANG_HEADER) | \
- (1U << ZIO_STAGE_CLAIM_GANG_MEMBERS) | \
- (1U << ZIO_STAGE_DVA_CLAIM) | \
- (1U << ZIO_STAGE_READY) | \
- (1U << ZIO_STAGE_WAIT_CHILDREN_DONE) | \
- (1U << ZIO_STAGE_DONE))
+ (ZIO_INTERLOCK_STAGES | \
+ (1U << ZIO_STAGE_DVA_CLAIM))
#define ZIO_IOCTL_PIPELINE \
- ((1U << ZIO_STAGE_OPEN) | \
- (1U << ZIO_STAGE_WAIT_CHILDREN_READY) | \
- (1U << ZIO_STAGE_READY) | \
- ZIO_VDEV_IO_PIPELINE | \
- (1U << ZIO_STAGE_WAIT_CHILDREN_DONE) | \
- (1U << ZIO_STAGE_DONE))
-
-#define ZIO_WAIT_FOR_CHILDREN_PIPELINE \
- ((1U << ZIO_STAGE_WAIT_CHILDREN_READY) | \
- (1U << ZIO_STAGE_READY) | \
- (1U << ZIO_STAGE_WAIT_CHILDREN_DONE) | \
- (1U << ZIO_STAGE_DONE))
-
-#define ZIO_WAIT_FOR_CHILDREN_DONE_PIPELINE \
- ((1U << ZIO_STAGE_WAIT_CHILDREN_DONE) | \
- (1U << ZIO_STAGE_DONE))
+ (ZIO_INTERLOCK_STAGES | \
+ (1U << ZIO_STAGE_VDEV_IO_START) | \
+ (1U << ZIO_STAGE_VDEV_IO_ASSESS))
-#define ZIO_VDEV_CHILD_PIPELINE \
- (ZIO_WAIT_FOR_CHILDREN_DONE_PIPELINE | \
- ZIO_VDEV_IO_PIPELINE)
-
-#define ZIO_ERROR_PIPELINE_MASK \
- ZIO_WAIT_FOR_CHILDREN_PIPELINE
-
-typedef struct zio_transform zio_transform_t;
-struct zio_transform {
- void *zt_data;
- uint64_t zt_size;
- uint64_t zt_bufsize;
- zio_transform_t *zt_next;
-};
+#define ZIO_CONFIG_LOCK_BLOCKING_STAGES \
+ ((1U << ZIO_STAGE_VDEV_IO_START) | \
+ (1U << ZIO_STAGE_DVA_ALLOCATE) | \
+ (1U << ZIO_STAGE_DVA_CLAIM))
extern void zio_inject_init(void);
extern void zio_inject_fini(void);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zvol.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zvol.h
index df85824d59bd..2a6452aa433c 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zvol.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zvol.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -35,17 +35,21 @@
extern "C" {
#endif
+#define ZVOL_OBJ 1ULL
+#define ZVOL_ZAP_OBJ 2ULL
+
#ifdef _KERNEL
extern int zvol_check_volsize(uint64_t volsize, uint64_t blocksize);
extern int zvol_check_volblocksize(uint64_t volblocksize);
extern int zvol_get_stats(objset_t *os, nvlist_t *nv);
-extern void zvol_create_cb(objset_t *os, void *arg, dmu_tx_t *tx);
-extern int zvol_create_minor(const char *, dev_t);
+extern void zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
+extern int zvol_create_minor(const char *, major_t);
extern int zvol_remove_minor(const char *);
-extern int zvol_set_volsize(const char *, dev_t, uint64_t);
+extern int zvol_set_volsize(const char *, major_t, uint64_t);
extern int zvol_set_volblocksize(const char *, uint64_t);
extern int zvol_open(dev_t *devp, int flag, int otyp, cred_t *cr);
+extern int zvol_dump(dev_t dev, caddr_t addr, daddr_t offset, int nblocks);
extern int zvol_close(dev_t dev, int flag, int otyp, cred_t *cr);
#ifndef __FreeBSD__
extern int zvol_strategy(buf_t *bp);