diff options
Diffstat (limited to 'sys/gnu/fs/xfs')
168 files changed, 110918 insertions, 0 deletions
diff --git a/sys/gnu/fs/xfs/FreeBSD/support/atomic.h b/sys/gnu/fs/xfs/FreeBSD/support/atomic.h new file mode 100644 index 000000000000..f8b6c9171ddc --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/support/atomic.h @@ -0,0 +1,37 @@ +#ifndef __XFS_SUPPORT_ATOMIC_H__ + +#include <sys/types.h> +#include <machine/atomic.h> + +typedef struct { + volatile unsigned int val; +} atomic_t; + +#define atomic_read(v) ((v)->val) +#define atomic_set(v, i) ((v)->val = (i)) + +#define atomic_add(i, v) atomic_add_int(&(v)->val, (i)) +#define atomic_inc(v) atomic_add_int(&(v)->val, 1) +#define atomic_dec(v) atomic_subtract_int(&(v)->val, 1) +#define atomic_sub(i, v) atomic_subtract_int(&(v)->val, (i)) +#define atomic_sub_and_test(i, v) (atomic_fetchadd_int(&(v)->val, (-i) == i) +#define atomic_dec_and_test(v) (atomic_fetchadd_int(&(v)->val, -1) == 1) + +/* + * This is used for two variables in XFS, one of which is a debug trace + * buffer index. + */ + +static __inline__ int atomicIncWithWrap(volatile unsigned int *ip, int val) +{ + unsigned int oldval, newval; + + do { + oldval = *ip; + newval = (oldval + 1 >= val) ? 0 : oldval + 1; + } while (atomic_cmpset_rel_int(ip, oldval, newval) == 0); + + return oldval; +} + +#endif /* __XFS_SUPPORT_ATOMIC_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/support/debug.c b/sys/gnu/fs/xfs/FreeBSD/support/debug.c new file mode 100644 index 000000000000..72345f0ef0b3 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/support/debug.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + + +#include <sys/param.h> +#include <sys/proc.h> +#include <sys/kernel.h> +#include <sys/systm.h> +#include <sys/sysctl.h> +#include <machine/stdarg.h> + +#include <support/debug.h> + +SYSCTL_NODE(_debug, OID_AUTO, xfs, CTLFLAG_RD, 0, "XFS debug options"); + +static int verbosity = 10; +SYSCTL_INT(_debug_xfs, OID_AUTO, verbosity, CTLFLAG_RW, &verbosity, 0, ""); + +#ifdef DEBUG + +static int doass = 1; +SYSCTL_INT(_debug_xfs, OID_AUTO, assert, CTLFLAG_RW, &doass, 0, ""); + +void +assfail(char *a, char *f, int l) +{ + if (doass == 0) return; + panic("XFS assertion failed: %s, file: %s, line: %d\n", a, f, l); +} + +int +get_thread_id(void) +{ + return curthread->td_proc->p_pid; +} + +#endif + +void +cmn_err(register int level, char *fmt, ...) +{ + char *fp = fmt; + char message[256]; + va_list ap; + + if (verbosity < level) + return; + + va_start(ap, fmt); + if (*fmt == '!') fp++; + vsprintf(message, fp, ap); + printf("%s\n", message); + va_end(ap); +} + + +void +icmn_err(register int level, char *fmt, va_list ap) +{ + char message[256]; + + if (verbosity < level) + return; + + vsprintf(message, fmt, ap); + printf("cmn_err level %d %s\n",level, message); +} + diff --git a/sys/gnu/fs/xfs/FreeBSD/support/debug.h b/sys/gnu/fs/xfs/FreeBSD/support/debug.h new file mode 100644 index 000000000000..6c82c84260d2 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/support/debug.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_SUPPORT_DEBUG_H__ +#define __XFS_SUPPORT_DEBUG_H__ + +#include <machine/stdarg.h> + +#define CE_DEBUG 7 /* debug */ +#define CE_CONT 6 /* continuation */ +#define CE_NOTE 5 /* notice */ +#define CE_WARN 4 /* warning */ +#define CE_ALERT 1 /* alert */ +#define CE_PANIC 0 /* panic */ + +extern void icmn_err(int, char *, va_list); +extern void cmn_err(int, char *, ...); + +#ifndef STATIC +# define STATIC static +#endif + +#if defined(INVARIANTS) +# ifndef DEBUG +# define DEBUG +# endif +#endif + +#if defined(DEBUG) +# ifdef lint +# define ASSERT(EX) ((void)0) /* avoid "constant in conditional" babble */ +# else +# define ASSERT(EX) ((EX)?((void)0):assfail(#EX, __FILE__, __LINE__)) +# endif /* lint */ +#else /* !DEBUG */ +# define ASSERT(x) ((void)0) +#endif /* !DEBUG */ + +#ifdef DEBUG +extern void assfail(char *, char *, int); +extern int get_thread_id(void); +#else +#define assfail(a, b, c) ((void)0) +#endif + +#define ASSERT_ALWAYS(EX) ((EX)?((void)0):assfail(#EX, __FILE__, __LINE__)) +#define debug_stop_all_cpus(param) /* param is "cpumask_t *" */ + +#endif /* __XFS_SUPPORT_DEBUG_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/support/kdb.c b/sys/gnu/fs/xfs/FreeBSD/support/kdb.c new file mode 100644 index 000000000000..2d3aac6f390b --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/support/kdb.c @@ -0,0 +1,63 @@ +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/systm.h> + +#include <machine/db_machdep.h> + +#include "opt_ddb.h" +#ifdef DDB +#include <ddb/ddb.h> +#else +#error "Must have options for KDB, DDB in kernel config" +#endif + +#include <support/kdb.h> + +DB_SET(xfs, xfs_ddb_cmd, db_cmd_set, CS_MORE, NULL) +{ + db_error("No commands registered.\n"); +} + + +int +kdb_register(char *cmd, kdb_func_t func, char *usage, char *help, short minlen) +{ + return 0; +} + +int +kdb_unregister(char *cmd) +{ + return 0; +} + +int +kdbgetaddrarg(int argc, const char **argv, int *nextarg, + kdb_machreg_t *value, long *offset, char **name, struct pt_regs *regs) +{ + return 0; +} + +int +kdbnearsym(unsigned long addr, kdb_symtab_t *symtab) + +{ + return 0; +} + +void +kdb_printf(const char *fmt, ...) +{ +} + +int +kdb_getarea_size(void *res, unsigned long addr, size_t size) +{ + return 0; +} + +int +db_putarea_size(unsigned long addr, void *res, size_t size) +{ + return 0; +} diff --git a/sys/gnu/fs/xfs/FreeBSD/support/kdb.h b/sys/gnu/fs/xfs/FreeBSD/support/kdb.h new file mode 100644 index 000000000000..d85bd6de763c --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/support/kdb.h @@ -0,0 +1,44 @@ +#ifndef __XFS_SUPPORT_KGDB_H__ +#define __XFS_SUPPORT_KGDB_H__ + +#define KDB_ARGCOUNT EINVAL + +struct pt_regs +{ + int dummy; +}; + +#define MODULE_AUTHOR(s) static char __module_author[] = s; +#define MODULE_DESCRIPTION(s) static char __module_description[] = s; +#define MODULE_LICENSE(s) static char __module_license[] = s + + +typedef int (*kdb_func_t)(int, const char **, const char **, struct pt_regs *); +typedef register_t kdb_machreg_t; + +/* + * Symbol table format. + */ +typedef struct __ksymtab { + unsigned long value; /* Address of symbol */ + const char *sym_name; /* Full symbol name, including any version */ + unsigned long sym_start; + unsigned long sym_end; +} kdb_symtab_t; + +extern int kdb_register(char *, kdb_func_t, char *, char *, short); +extern int kdb_unregister(char *); + +extern int kdbgetaddrarg(int, const char**, int*, kdb_machreg_t *, + long *, char **, struct pt_regs *); +extern int kdbnearsym(unsigned long, kdb_symtab_t *); +extern void kdb_printf(const char *,...) + __attribute__ ((format (printf, 1, 2))); + +extern int kdb_getarea_size(void *, unsigned long, size_t); +extern int kdb_putarea_size(unsigned long, void *, size_t); + +#define kdb_getarea(x,addr) kdb_getarea_size(&(x), addr, sizeof((x))) +#define kdb_putarea(addr,x) kdb_putarea_size(addr, &(x), sizeof((x))) + +#endif /* __XFS_SUPPORT_KGDB_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/support/kmem.c b/sys/gnu/fs/xfs/FreeBSD/support/kmem.c new file mode 100644 index 000000000000..c98e4bcb4d71 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/support/kmem.c @@ -0,0 +1,3 @@ +#include <support/kmem.h> + +MALLOC_DEFINE(M_XFS, "XFSALLOC", "XFS memory"); diff --git a/sys/gnu/fs/xfs/FreeBSD/support/kmem.h b/sys/gnu/fs/xfs/FreeBSD/support/kmem.h new file mode 100644 index 000000000000..f457302c062f --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/support/kmem.h @@ -0,0 +1,65 @@ +#ifndef __XFS_SUPPORT_KMEM_H__ +#define __XFS_SUPPORT_KMEM_H__ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/malloc.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <vm/uma.h> + +typedef unsigned long xfs_pflags_t; + +#define PFLAGS_TEST_NOIO() 0 +#define PFLAGS_TEST_FSTRANS() 0 + +#define PFLAGS_SET_NOIO(STATEP) do { \ +} while (0) + +#define PFLAGS_SET_FSTRANS(STATEP) do { \ +} while (0) + +#define PFLAGS_RESTORE(STATEP) do { \ +} while (0) + +#define PFLAGS_DUP(OSTATEP, NSTATEP) do { \ +} while (0) + +/* + * memory management routines + */ +#define KM_SLEEP M_WAITOK +#define KM_SLEEP_IO M_WAITOK +#define KM_NOFS M_WAITOK +#define KM_NOSLEEP M_NOWAIT +#define KM_CACHEALIGN 0 + +#define kmem_zone uma_zone + +typedef struct uma_zone kmem_zone_t; +typedef struct uma_zone xfs_zone_t; + +#define kmem_zone_init(len, name) \ + uma_zcreate(name, len, NULL, NULL, NULL, NULL, 0, 0) +#define kmem_zone_free(zone, ptr) \ + uma_zfree(zone, ptr) +#define kmem_cache_destroy(zone) \ + uma_zdestroy(zone) +#define kmem_zone_alloc(zone, flg) \ + uma_zalloc(zone, flg) +#define kmem_zone_zalloc(zone, flg) \ + uma_zalloc(zone, (flg) | M_ZERO) + +#define kmem_alloc(len, flg) \ + malloc(len, M_XFS, flg) +#define kmem_zalloc(len, flg) \ + malloc(len, M_XFS, (flg) | M_ZERO) +#define kmem_free(ptr, size) \ + free(ptr, M_XFS) +#define kmem_realloc(ptr, nsize, osize, flg) \ + realloc(ptr, nsize, M_XFS, flg) + +MALLOC_DECLARE(M_XFS); + +#endif /* __XFS_SUPPORT_KMEM_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/support/ktrace.c b/sys/gnu/fs/xfs/FreeBSD/support/ktrace.c new file mode 100644 index 000000000000..462a66572adc --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/support/ktrace.c @@ -0,0 +1,348 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include <xfs.h> + +static kmem_zone_t *ktrace_hdr_zone; +static kmem_zone_t *ktrace_ent_zone; +static int ktrace_zentries; +static struct mtx wrap_lock; + +void +ktrace_init(int zentries) +{ + ktrace_zentries = zentries; + + ktrace_hdr_zone = kmem_zone_init(sizeof(ktrace_t), + "ktrace_hdr"); + ASSERT(ktrace_hdr_zone); + + ktrace_ent_zone = kmem_zone_init(ktrace_zentries + * sizeof(ktrace_entry_t), + "ktrace_ent"); + ASSERT(ktrace_ent_zone); + + mtx_init(&wrap_lock, "xfsktr", NULL, MTX_DEF); +} + +void +ktrace_uninit(void) +{ + kmem_cache_destroy(ktrace_hdr_zone); + kmem_cache_destroy(ktrace_ent_zone); + mtx_destroy(&wrap_lock); +} + +/* + * ktrace_alloc() + * + * Allocate a ktrace header and enough buffering for the given + * number of entries. + */ +ktrace_t * +ktrace_alloc(int nentries, int sleep) +{ + ktrace_t *ktp; + ktrace_entry_t *ktep; + + ktp = (ktrace_t*)kmem_zone_alloc(ktrace_hdr_zone, sleep); + + if (ktp == (ktrace_t*)NULL) { + /* + * KM_SLEEP callers don't expect failure. + */ + if (sleep & KM_SLEEP) + panic("ktrace_alloc: NULL memory on KM_SLEEP request!"); + + return NULL; + } + + /* + * Special treatment for buffers with the ktrace_zentries entries + */ + if (nentries == ktrace_zentries) { + ktep = (ktrace_entry_t*)kmem_zone_zalloc(ktrace_ent_zone, + sleep); + } else { + ktep = (ktrace_entry_t*)kmem_zalloc((nentries * sizeof(*ktep)), + sleep); + } + + if (ktep == NULL) { + /* + * KM_SLEEP callers don't expect failure. + */ + if (sleep & KM_SLEEP) + panic("ktrace_alloc: NULL memory on KM_SLEEP request!"); + + kmem_free(ktp, sizeof(*ktp)); + + return NULL; + } + + spinlock_init(&(ktp->kt_lock), "kt_lock"); + + ktp->kt_entries = ktep; + ktp->kt_nentries = nentries; + ktp->kt_index = 0; + ktp->kt_rollover = 0; + return ktp; +} + + +/* + * ktrace_free() + * + * Free up the ktrace header and buffer. It is up to the caller + * to ensure that no-one is referencing it. + */ +void +ktrace_free(ktrace_t *ktp) +{ + int entries_size; + + if (ktp == (ktrace_t *)NULL) + return; + + spinlock_destroy(&ktp->kt_lock); + + /* + * Special treatment for the Vnode trace buffer. + */ + if (ktp->kt_nentries == ktrace_zentries) { + kmem_zone_free(ktrace_ent_zone, ktp->kt_entries); + } else { + entries_size = (int)(ktp->kt_nentries * sizeof(ktrace_entry_t)); + + kmem_free(ktp->kt_entries, entries_size); + } + + kmem_zone_free(ktrace_hdr_zone, ktp); +} + + +/* + * Enter the given values into the "next" entry in the trace buffer. + * kt_index is always the index of the next entry to be filled. + */ +void +ktrace_enter( + ktrace_t *ktp, + void *val0, + void *val1, + void *val2, + void *val3, + void *val4, + void *val5, + void *val6, + void *val7, + void *val8, + void *val9, + void *val10, + void *val11, + void *val12, + void *val13, + void *val14, + void *val15) +{ + int index; + ktrace_entry_t *ktep; + + ASSERT(ktp != NULL); + + /* + * Grab an entry by pushing the index up to the next one. + */ + mtx_lock(&wrap_lock); + index = ktp->kt_index; + if (++ktp->kt_index == ktp->kt_nentries) + ktp->kt_index = 0; + mtx_unlock(&wrap_lock); + + if (!ktp->kt_rollover && index == ktp->kt_nentries - 1) + ktp->kt_rollover = 1; + + ASSERT((index >= 0) && (index < ktp->kt_nentries)); + + ktep = &(ktp->kt_entries[index]); + + ktep->val[0] = val0; + ktep->val[1] = val1; + ktep->val[2] = val2; + ktep->val[3] = val3; + ktep->val[4] = val4; + ktep->val[5] = val5; + ktep->val[6] = val6; + ktep->val[7] = val7; + ktep->val[8] = val8; + ktep->val[9] = val9; + ktep->val[10] = val10; + ktep->val[11] = val11; + ktep->val[12] = val12; + ktep->val[13] = val13; + ktep->val[14] = val14; + ktep->val[15] = val15; +} + +/* + * Return the number of entries in the trace buffer. + */ +int +ktrace_nentries( + ktrace_t *ktp) +{ + if (ktp == NULL) { + return 0; + } + + return (ktp->kt_rollover ? ktp->kt_nentries : ktp->kt_index); +} + +/* + * ktrace_first() + * + * This is used to find the start of the trace buffer. + * In conjunction with ktrace_next() it can be used to + * iterate through the entire trace buffer. This code does + * not do any locking because it is assumed that it is called + * from the debugger. + * + * The caller must pass in a pointer to a ktrace_snap + * structure in which we will keep some state used to + * iterate through the buffer. This state must not touched + * by any code outside of this module. + */ +ktrace_entry_t * +ktrace_first(ktrace_t *ktp, ktrace_snap_t *ktsp) +{ + ktrace_entry_t *ktep; + int index; + int nentries; + + if (ktp->kt_rollover) + index = ktp->kt_index; + else + index = 0; + + ktsp->ks_start = index; + ktep = &(ktp->kt_entries[index]); + + nentries = ktrace_nentries(ktp); + index++; + if (index < nentries) { + ktsp->ks_index = index; + } else { + ktsp->ks_index = 0; + if (index > nentries) + ktep = NULL; + } + return ktep; +} + +/* + * ktrace_next() + * + * This is used to iterate through the entries of the given + * trace buffer. The caller must pass in the ktrace_snap_t + * structure initialized by ktrace_first(). The return value + * will be either a pointer to the next ktrace_entry or NULL + * if all of the entries have been traversed. + */ +ktrace_entry_t * +ktrace_next( + ktrace_t *ktp, + ktrace_snap_t *ktsp) +{ + int index; + ktrace_entry_t *ktep; + + index = ktsp->ks_index; + if (index == ktsp->ks_start) { + ktep = NULL; + } else { + ktep = &ktp->kt_entries[index]; + } + + index++; + if (index == ktrace_nentries(ktp)) { + ktsp->ks_index = 0; + } else { + ktsp->ks_index = index; + } + + return ktep; +} + +/* + * ktrace_skip() + * + * Skip the next "count" entries and return the entry after that. + * Return NULL if this causes us to iterate past the beginning again. + */ +ktrace_entry_t * +ktrace_skip( + ktrace_t *ktp, + int count, + ktrace_snap_t *ktsp) +{ + int index; + int new_index; + ktrace_entry_t *ktep; + int nentries = ktrace_nentries(ktp); + + index = ktsp->ks_index; + new_index = index + count; + while (new_index >= nentries) { + new_index -= nentries; + } + if (index == ktsp->ks_start) { + /* + * We've iterated around to the start, so we're done. + */ + ktep = NULL; + } else if ((new_index < index) && (index < ktsp->ks_index)) { + /* + * We've skipped past the start again, so we're done. + */ + ktep = NULL; + ktsp->ks_index = ktsp->ks_start; + } else { + ktep = &(ktp->kt_entries[new_index]); + new_index++; + if (new_index == nentries) { + ktsp->ks_index = 0; + } else { + ktsp->ks_index = new_index; + } + } + return ktep; +} diff --git a/sys/gnu/fs/xfs/FreeBSD/support/ktrace.h b/sys/gnu/fs/xfs/FreeBSD/support/ktrace.h new file mode 100644 index 000000000000..b566ef8fa756 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/support/ktrace.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_SUPPORT_KTRACE_H__ +#define __XFS_SUPPORT_KTRACE_H__ + +#include <support/spin.h> + +/* + * Trace buffer entry structure. + */ +typedef struct ktrace_entry { + void *val[16]; +} ktrace_entry_t; + +/* + * Trace buffer header structure. + */ +typedef struct ktrace { + lock_t kt_lock; /* mutex to guard counters */ + int kt_nentries; /* number of entries in trace buf */ + int kt_index; /* current index in entries */ + int kt_rollover; + ktrace_entry_t *kt_entries; /* buffer of entries */ +} ktrace_t; + +/* + * Trace buffer snapshot structure. + */ +typedef struct ktrace_snap { + int ks_start; /* kt_index at time of snap */ + int ks_index; /* current index */ +} ktrace_snap_t; + + +#ifdef CONFIG_XFS_TRACE + +extern void ktrace_init(int zentries); +extern void ktrace_uninit(void); + +extern ktrace_t *ktrace_alloc(int, int); +extern void ktrace_free(ktrace_t *); + +extern void ktrace_enter( + ktrace_t *, + void *, + void *, + void *, + void *, + void *, + void *, + void *, + void *, + void *, + void *, + void *, + void *, + void *, + void *, + void *, + void *); + +extern ktrace_entry_t *ktrace_first(ktrace_t *, ktrace_snap_t *); +extern int ktrace_nentries(ktrace_t *); +extern ktrace_entry_t *ktrace_next(ktrace_t *, ktrace_snap_t *); +extern ktrace_entry_t *ktrace_skip(ktrace_t *, int, ktrace_snap_t *); + +#else +#define ktrace_init(x) do { } while (0) +#define ktrace_uninit() do { } while (0) +#endif /* CONFIG_XFS_TRACE */ + +#endif /* __XFS_SUPPORT_KTRACE_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/support/move.h b/sys/gnu/fs/xfs/FreeBSD/support/move.h new file mode 100644 index 000000000000..856ec03f5ae5 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/support/move.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#ifndef __XFS_SUPPORT_MOVE_H__ +#define __XFS_SUPPORT_MOVE_H__ + +#include <sys/uio.h> + +typedef struct iovec iovec_t; +typedef struct uio uio_t; + +static __inline__ int +uio_read(void *buf, int howmuch, struct uio *uiop) +{ + uiop->uio_rw = UIO_READ; + return uiomove(buf,howmuch,uiop); +} + +#endif /* __XFS_SUPPORT_MOVE_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/support/mrlock.c b/sys/gnu/fs/xfs/FreeBSD/support/mrlock.c new file mode 100644 index 000000000000..950303938a07 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/support/mrlock.c @@ -0,0 +1,49 @@ +#include <sys/param.h> +#include <support/mrlock.h> + +void +_sx_xfs_destroy(struct sx *sx) +{ + if (sx->sx_cnt == -1) + sx_xunlock(sx); + sx_destroy(sx); +} + +void +_sx_xfs_lock(struct sx *sx, int type, const char *file, int line) +{ + if (type == MR_ACCESS) + _sx_slock(sx, file, line); + else if (type == MR_UPDATE) + _sx_sunlock(sx, file, line); + else + panic("Invalid lock type passed"); +} + + +void +_sx_xfs_unlock(struct sx *sx, const char *file, int line) +{ + if (_sx_xfs_xowned(sx)) + _sx_xunlock(sx, file, line); + else if (_sx_xfs_sowned(sx)) + _sx_sunlock(sx, file, line); + else + panic("lock is not locked"); +} + +int +ismrlocked(mrlock_t *mrp, int type) +{ + if (type == MR_ACCESS) + return _sx_xfs_sowned(mrp); /* Read lock */ + else if (type == MR_UPDATE) + return _sx_xfs_xowned(mrp); /* Write lock */ + else if (type == (MR_UPDATE | MR_ACCESS)) + return _sx_xfs_sowned(mrp) || + _sx_xfs_xowned(mrp); /* Any type of lock held */ + return (mrp->sx_shrd_wcnt > 0 || mrp->sx_excl_wcnt > 0); +} + + + diff --git a/sys/gnu/fs/xfs/FreeBSD/support/mrlock.h b/sys/gnu/fs/xfs/FreeBSD/support/mrlock.h new file mode 100644 index 000000000000..4e82d4199118 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/support/mrlock.h @@ -0,0 +1,65 @@ +#ifndef __XFS_SUPPORT_MRLOCK_H__ +#define __XFS_SUPPORT_MRLOCK_H__ + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/sx.h> + +#include <support/debug.h> + +/* + * Implement mrlocks on FreeBSD that work for XFS. + * Use FreeBSD sx lock and add necessary functions + * if additional functionality is requested + */ +typedef struct sx mrlock_t; + +#define MR_ACCESS 1 +#define MR_UPDATE 2 + +/* + * Compatibility defines, not really used + */ +#define MRLOCK_BARRIER 0x1 +#define MRLOCK_ALLOW_EQUAL_PRI 0x8 + +/* + * mraccessf/mrupdatef take flags to be passed in while sleeping; + * only PLTWAIT is currently supported. + */ +#define mrinit(lock, name) sx_init(lock, name) +#define mrlock_init(lock, type, name, seq) sx_init(lock, name) +#define mrfree(lock) _sx_xfs_destroy(lock) +#define mraccessf(lock, f) sx_slock(lock) +#define mrupdatef(lock, f) sx_xlock(lock) +#define mraccunlock(lock) sx_sunlock(lock) +#define mrtryaccess(lock) sx_try_slock(lock) +#define mrtryupdate(lock) sx_try_xlock(lock) +#define mraccess(mrp) mraccessf(mrp, 0) +#define mrupdate(mrp) mrupdatef(mrp, 0) +#define mrislocked_access(lock) _sx_xfs_xowned(lock) +#define mrislocked_update(lock) _sx_xfs_sowned(lock) +#define mrtrypromote(lock) sx_try_upgrade(lock) +#define mrdemote(lock) sx_downgrade(lock) + +int ismrlocked(mrlock_t *, int); +void _sx_xfs_lock(struct sx *sx, int type, const char *file, int line); +void _sx_xfs_unlock(struct sx *sx, const char *file, int line); +void _sx_xfs_destroy(struct sx *sx); +#define _sx_xfs_xowned(lock) ((lock)->sx_cnt < 0) +#define _sx_xfs_sowned(lock) ((lock)->sx_cnt > 0) + +/* + * Functions, not implemented in FreeBSD + */ +#define mrunlock(lock) \ + _sx_xfs_unlock(lock, __FILE__, __LINE__) + +#define mrlock(lock, type, flags) \ + _sx_xfs_lock(lock, type, __FILE__, __LINE__) + + + +#endif /* __XFS_SUPPORT_MRLOCK_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/support/mutex.h b/sys/gnu/fs/xfs/FreeBSD/support/mutex.h new file mode 100644 index 000000000000..d9b89b3adcf3 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/support/mutex.h @@ -0,0 +1,29 @@ +#ifndef __XFS_SUPPORT_MUTEX_H__ +#define __XFS_SUPPORT_MUTEX_H__ + +#include <sys/param.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/sx.h> + +/* + * Map the mutex'es from IRIX to FreeBSD. Irix holds mutexes across + * sleeps, so on FreeBSD we have a choice of sema, sx or lockmgr + * to use as a underlining implemenation. Go with sx always locked + * in exclusive mode for now as it gets all the benefits of witness + * checking. + */ +typedef struct sx mutex_t; + +#define mutex_init(lock, type, name) sx_init(lock, name) +#define mutex_lock(lock, num) sx_xlock(lock) +#define mutex_trylock(lock) sx_try_xlock(lock) +#define mutex_unlock(lock) sx_xunlock(lock) +#define mutex_destroy(lock) sx_destroy(lock) + +/* + * Type for mutex_init() + */ +#define MUTEX_DEFAULT 0 + +#endif /* __XFS_SUPPORT_MUTEX_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/support/rwlock.h b/sys/gnu/fs/xfs/FreeBSD/support/rwlock.h new file mode 100644 index 000000000000..05f489e1e4f6 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/support/rwlock.h @@ -0,0 +1,22 @@ +#ifndef __XFS_SUPPORT_RWLOCK_H__ +#define __XFS_SUPPORT_RWLOCK_H__ + +#include <sys/param.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/sx.h> + +typedef struct sx rwlock_t; +typedef int wait_queue_head_t; + +#define rwlock_init(lock) sx_init(lock, "rwlock") +#define rwlock_destroy(lock) sx_destroy(lock) +#define read_lock(lock) sx_slock(lock) +#define read_unlock(lock) sx_sunlock(lock) +#define write_lock(lock) sx_xlock(lock) +#define write_unlock(lock) sx_xunlock(lock) +#define rwlock_trypromote(lock) sx_try_upgrade(lock) +#define rwlock_demote(lock) sx_downgrade(lock) + + +#endif /* __XFS_SUPPORT_RWLOCK_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/support/rwsem.h b/sys/gnu/fs/xfs/FreeBSD/support/rwsem.h new file mode 100644 index 000000000000..bb972327bb78 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/support/rwsem.h @@ -0,0 +1,21 @@ +#ifndef __XFS_SUPPORT_RWSEM_H__ +#define __XFS_SUPPORT_RWSEM_H__ + +#include <sys/param.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/sx.h> + +#define rw_semaphore sx + +#define init_rwsem(sem) sx_init(sem, "rwsem") +#define free_rwsem(sem) sx_destroy(sem) +#define down_read(sem) sx_slock(sem) +#define down_read_trylock(sem) sx_try_slock(sem) +#define down_write(sem) sx_xlock(sem) +#define down_write_trylock(sem) sx_try_xlock(sem) +#define up_read(sem) sx_sunlock(sem) +#define up_write(sem) sx_xunlock(sem) +#define downgrade_write(sem) sx_downgrade(sem) + +#endif /* __XFS_SUPPORT_RWSEM_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/support/sema.h b/sys/gnu/fs/xfs/FreeBSD/support/sema.h new file mode 100644 index 000000000000..db7795b93d2a --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/support/sema.h @@ -0,0 +1,53 @@ +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef __XFS_SUPPORT_SEMA_H__ +#define __XFS_SUPPORT_SEMA_H__ + +#include <sys/sema.h> + +/* + * sema_t structure just maps to struct sema in FreeBSD kernel. + */ + +typedef struct sema sema_t; + +#define init_sema(sp, val, c, d) sema_init(sp, val, c) +#define initnsema(sp, val, name) sema_init(sp, val, name) +#define psema(sp, b) sema_wait(sp) +#define vsema(sp) sema_post(sp) +#define valusema(sp) sema_value(sp) +#define freesema(sp) sema_destroy(sp) +#define cpsema(sp) sema_trywait(sp) + +#endif /* __XFS_SUPPORT_SEMA_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/support/spin.h b/sys/gnu/fs/xfs/FreeBSD/support/spin.h new file mode 100644 index 000000000000..2b09be477959 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/support/spin.h @@ -0,0 +1,42 @@ +#ifndef __XFS_SUPPORT_SPIN_H__ +#define __XFS_SUPPORT_SPIN_H__ + +#include <sys/param.h> +#include <sys/types.h> +#include <sys/lock.h> +#include <sys/mutex.h> + +#define SPLDECL(s) register_t s + +/* + * Map the spinlocks from IRIX to FreeBSD + */ +#define spinlock_init(lock, name) mtx_init(lock, name, NULL, MTX_DEF) +#define spinlock_destroy(lock) mtx_destroy(lock) + +/* + * Map lock_t from IRIX to FreeBSD mutexes + */ +typedef struct mtx lock_t; + +#define nested_spinunlock(lock) mtx_unlock(lock) +#define nested_spinlock(lock) mtx_lock(lock) +#define nested_spintrylock(lock) mtx_trylock(lock) + +#define spin_lock(lock) mtx_lock(lock) +#define spin_unlock(lock) mtx_unlock(lock) + +#if LOCK_DEBUG > 0 +#define mutex_spinlock(lock) (spin_lock(lock),0) +#else +static __inline register_t +mutex_spinlock(lock_t *lock) { mtx_lock(lock); return 0; } +#endif + +#define mutex_spinunlock(lock,s) \ + do { \ + spin_unlock(lock); \ + if (&s) {} \ + } while (0) + +#endif /* __XFS_SUPPORT_SPIN_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/support/support.h b/sys/gnu/fs/xfs/FreeBSD/support/support.h new file mode 100644 index 000000000000..d7804fa8b265 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/support/support.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_SUPPORT_H__ +#define __XFS_SUPPORT_H__ + +#include <support/types.h> +#include <support/arch.h> +#include <support/kmem.h> +#include <support/mrlock.h> +#include <support/spin.h> +#include <support/sv.h> +#include <support/ktrace.h> +#include <support/mutex.h> +#include <support/sema.h> +#include <support/atomic.h> +#include <support/debug.h> +#include <support/uuid.h> +#include <support/time.h> + +#endif /* __XFS_SUPPORT_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/support/sv.h b/sys/gnu/fs/xfs/FreeBSD/support/sv.h new file mode 100644 index 000000000000..fa37129c6037 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/support/sv.h @@ -0,0 +1,34 @@ +#ifndef __XFS_SUPPORT_SV_H__ +#define __XFS_SUPPORT_SV_H__ + +#include <sys/condvar.h> + +/* + * Synchronisation variables + * + * parameters "pri", "svf" and "rts" are not (yet?) implemented + * + */ + +typedef struct cv sv_t; + +#define init_sv(sv,type,name,flag) \ + cv_init(sv, name) +#define sv_init(sv,flag,name) \ + cv_init(sv, name) +#define sv_wait(sv, pri, lock, spl) \ + cv_wait_unlock(sv, lock) +#define sv_signal(sv) \ + cv_signal(sv) +#define sv_broadcast(sv) \ + cv_broadcast(sv) +#define sv_destroy(sv) \ + cv_destroy(sv) + +#define SV_FIFO 0x0 /* sv_t is FIFO type */ +#define SV_LIFO 0x2 /* sv_t is LIFO type */ +#define SV_PRIO 0x4 /* sv_t is PRIO type */ +#define SV_KEYED 0x6 /* sv_t is KEYED type */ +#define SV_DEFAULT SV_FIFO + +#endif /* __XFS_SUPPORT_SV_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/support/time.h b/sys/gnu/fs/xfs/FreeBSD/support/time.h new file mode 100644 index 000000000000..9b3a974c2431 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/support/time.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_SUPPORT_TIME_H__ +#define __XFS_SUPPORT_TIME_H__ + +#define delay(ticks) DELAY(ticks) + +#endif /* __XFS_SUPPORT_TIME_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/support/uuid.c b/sys/gnu/fs/xfs/FreeBSD/support/uuid.c new file mode 100644 index 000000000000..55344c01419c --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/support/uuid.c @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include <xfs_config.h> +#include <xfs_compat.h> +#include <xfs_types.h> +#include <xfs_arch.h> + +#include <support/uuid.h> +#include <support/kmem.h> +#include <support/debug.h> +#include <support/mutex.h> + +static mutex_t uuid_monitor; +static int uuid_table_size; +static uuid_t *uuid_table; + +void +uuid_init(void) +{ + mutex_init(&uuid_monitor, MUTEX_DEFAULT, "uuid_monitor"); +} + +void +uuid_cleanup(void) +{ + mutex_destroy(&uuid_monitor); +} + +/* + * uuid_getnodeuniq - obtain the node unique fields of a UUID. + * + * This is not in any way a standard or condoned UUID function; + * it just something that's needed for user-level file handles. + */ +void +uuid_getnodeuniq(uuid_t *uuid, int fsid [2]) +{ + char *uu = (char *)uuid; + + /* on IRIX, this function assumes big-endian fields within + * the uuid, so we use INT_GET to get the same result on + * little-endian systems + */ + + fsid[0] = (INT_GET(*(u_int16_t*)(uu+8), ARCH_CONVERT) << 16) + + INT_GET(*(u_int16_t*)(uu+4), ARCH_CONVERT); + fsid[1] = INT_GET(*(u_int32_t*)(uu ), ARCH_CONVERT); +} + +void +uuid_create_nil(uuid_t *uuid) +{ + memset(uuid, 0, sizeof(*uuid)); +} + +int +uuid_is_nil(uuid_t *uuid) +{ + int i; + char *cp = (char *)uuid; + + if (uuid == NULL) + return 0; + /* implied check of version number here... */ + for (i = 0; i < sizeof *uuid; i++) + if (*cp++) return 0; /* not nil */ + return 1; /* is nil */ +} + +int +uuid_equal(uuid_t *uuid1, uuid_t *uuid2) +{ + return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1; +} + +/* + * Given a 128-bit uuid, return a 64-bit value by adding the top and bottom + * 64-bit words. NOTE: This function can not be changed EVER. Although + * brain-dead, some applications depend on this 64-bit value remaining + * persistent. Specifically, DMI vendors store the value as a persistent + * filehandle. + */ +__uint64_t +uuid_hash64(uuid_t *uuid) +{ + __uint64_t *sp = (__uint64_t *)uuid; + + return sp[0] + sp[1]; +} + +int +uuid_table_insert(uuid_t *uuid) +{ + int i, hole; + + mutex_lock(&uuid_monitor, PVFS); + for (i = 0, hole = -1; i < uuid_table_size; i++) { + if (uuid_is_nil(&uuid_table[i])) { + hole = i; + continue; + } + if (uuid_equal(uuid, &uuid_table[i])) { + mutex_unlock(&uuid_monitor); + return 0; + } + } + if (hole < 0) { + uuid_table = kmem_realloc(uuid_table, + (uuid_table_size + 1) * sizeof(*uuid_table), + uuid_table_size * sizeof(*uuid_table), + KM_SLEEP); + hole = uuid_table_size++; + } + uuid_table[hole] = *uuid; + mutex_unlock(&uuid_monitor); + return 1; +} + +void +uuid_table_remove(uuid_t *uuid) +{ + int i; + + mutex_lock(&uuid_monitor, PVFS); + for (i = 0; i < uuid_table_size; i++) { + if (uuid_is_nil(&uuid_table[i])) + continue; + if (!uuid_equal(uuid, &uuid_table[i])) + continue; + uuid_create_nil(&uuid_table[i]); + break; + } + ASSERT(i < uuid_table_size); + mutex_unlock(&uuid_monitor); +} diff --git a/sys/gnu/fs/xfs/FreeBSD/support/uuid.h b/sys/gnu/fs/xfs/FreeBSD/support/uuid.h new file mode 100644 index 000000000000..d8f389ae5a22 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/support/uuid.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_SUPPORT_UUID_H__ +#define __XFS_SUPPORT_UUID_H__ + +void uuid_init(void); +void uuid_cleanup(void); +void uuid_create_nil(uuid_t *uuid); +int uuid_is_nil(uuid_t *uuid); +int uuid_equal(uuid_t *uuid1, uuid_t *uuid2); +void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]); +__uint64_t uuid_hash64(uuid_t *uuid); +int uuid_table_insert(uuid_t *uuid); +void uuid_table_remove(uuid_t *uuid); + +#endif /* __XFS_SUPPORT_UUID_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_buf.c b/sys/gnu/fs/xfs/FreeBSD/xfs_buf.c new file mode 100644 index 000000000000..ac300dbed01c --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_buf.c @@ -0,0 +1,188 @@ +/* + * + * + */ +#include "xfs.h" +#include "xfs_macros.h" +#include "xfs_types.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir.h" +#include "xfs_dir2.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_clnt.h" +#include "xfs_mountops.h" +#include <geom/geom.h> + +xfs_buf_t * +xfs_buf_read_flags(xfs_buftarg_t *target, xfs_daddr_t blkno, size_t len, int flags) +{ + struct buf *bp; + struct g_consumer *cp; + + KASSERT((target != NULL), ("got NULL buftarg_t")); + + cp = target->specvp->v_bufobj.bo_private; + if (cp == NULL) { + bp = NULL; + goto done; + } + + /* This restriction is in GEOM's g_io_request() */ + if ((BBTOB(len) % cp->provider->sectorsize) != 0) { + printf("Read request %ld does not align with sector size: %d\n", + (long)BBTOB(len), cp->provider->sectorsize); + bp = NULL; + goto done; + } + + if (bread(target->specvp, blkno, BBTOB(len), NOCRED, &bp)) { + printf("bread failed specvp %p blkno %qd BBTOB(len) %ld\n", + target->specvp, blkno, (long)BBTOB(len)); + bp = NULL; + goto done; + } + if (flags & B_MANAGED) + bp->b_flags |= B_MANAGED; + xfs_buf_set_target(bp, target); + +done: + return (bp); +} + +xfs_buf_t * +xfs_buf_get_flags(xfs_buftarg_t *target, xfs_daddr_t blkno, size_t len, int flags) +{ + struct buf *bp = NULL; + KASSERT((target != NULL), ("got NULL buftarg_t")); + bp = getblk(target->specvp, blkno, BBTOB(len), 0, 0, 0); + if (bp != NULL) + xfs_buf_set_target(bp, target); + return (bp); +} + +xfs_buf_t* +xfs_buf_get_empty(size_t size, xfs_buftarg_t *target) +{ + struct buf *bp; + + bp = geteblk(0); + if (bp != NULL) { + bp->b_bufsize = size; + bp->b_bcount = size; + + KASSERT(BUF_REFCNT(bp) == 1, + ("xfs_buf_get_empty: bp %p not locked",bp)); + + xfs_buf_set_target(bp, target); + } + return (bp); +} + +xfs_buf_t* +xfs_buf_get_noaddr(size_t len, xfs_buftarg_t *target) +{ + struct buf *bp; + if (len >= MAXPHYS) + return (NULL); + + bp = geteblk(len); + if (bp != NULL) { + KASSERT(BUF_REFCNT(bp) == 1, + ("xfs_buf_get_empty: bp %p not locked",bp)); + + xfs_buf_set_target(bp, target); + } + + return (bp); +} + +void +xfs_buf_free(xfs_buf_t *bp) +{ + bp->b_flags |= B_INVAL; + BUF_KERNPROC(bp); /* ugly hack #1 */ + if (bp->b_kvasize == 0) { + bp->b_saveaddr = bp->b_kvabase; /* ugly hack #2 */ + bp->b_data = bp->b_saveaddr; + bp->b_bcount = 0; + bp->b_bufsize = 0; + } + brelse(bp); +} + +void +xfs_baread(xfs_buftarg_t *targp, xfs_daddr_t ioff, size_t isize) +{ + daddr_t rablkno; + int rabsize; + + rablkno = ioff; + rabsize = BBTOB(isize); + breada(targp->specvp, &rablkno, &rabsize, 1, NOCRED); +} + +void +xfs_buf_set_target(xfs_buf_t *bp, xfs_buftarg_t *targ) +{ + bp->b_bufobj = &targ->specvp->v_bufobj; + bp->b_caller1 = targ; +} + +xfs_buftarg_t * +xfs_buf_get_target(xfs_buf_t *bp) +{ + return (xfs_buftarg_t *)bp->b_caller1; +} + +int +XFS_bwrite(xfs_buf_t *bp) +{ + int error; + if (bp->b_vp == NULL) { + error = xfs_buf_iorequest(bp); + + if ((bp->b_flags & B_ASYNC) == 0) { + error = bufwait(bp); + if (BUF_REFCNT(bp) > 1) + BUF_UNLOCK(bp); + else + brelse(bp); + } + return (error); + } + error = bwrite(bp); + return (error); +} + +void +xfs_bpin(xfs_buf_t *bp) +{ + printf("xfs_bpin(%p)\n", bp); + bpin(bp); +} + +void +xfs_bunpin(xfs_buf_t *bp) +{ + printf("xfs_bunpin(%p)\n", bp); + bunpin(bp); +} + +int +xfs_ispin(xfs_buf_t *bp) +{ + printf("xfs_ispin(%p)\n", bp); + return bp->b_pin_count; +} + +void +xfs_bwait_unpin(xfs_buf_t *bp) +{ + printf("xfs_bwait_unpin(%p)\n", bp); + bunpin_wait(bp); +} diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_buf.h b/sys/gnu/fs/xfs/FreeBSD/xfs_buf.h new file mode 100644 index 000000000000..54a7fc656e45 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_buf.h @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_BUF_H__ +#define __XFS_BUF_H__ + +#include <sys/bio.h> +#include <sys/buf.h> + +/* XXX: move this into buf.h */ +#ifndef B_MANAGED +#define B_MANAGED B_08000000 +#endif + +struct xfs_buf; +struct xfs_mount; +struct vnode; + +typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); /* call-back function on I/O completion */ +typedef void (*xfs_buf_relse_t)(struct xfs_buf *); /* call-back function on I/O completion */ +typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *); + +typedef struct xfs_buftarg { + struct cdev *dev; + struct vnode *specvp; +} xfs_buftarg_t; + +typedef struct buf xfs_buf_t; +#define xfs_buf buf + +/* These are just for xfs_syncsub... it sets an internal variable + * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t + */ + +#define XFS_B_ASYNC B_ASYNC +#define XFS_B_DELWRI B_DELWRI +#define XFS_B_READ BIO_READ +#define XFS_B_WRITE BIO_WRITE + +#define XFS_B_STALE B_INVAL +#define XFS_BUF_LOCK 0 +#define XFS_BUF_TRYLOCK 0 +#define XFS_BUF_MAPPED 0 +#define BUF_BUSY 0 + + /* debugging routines might need this */ +#define XFS_BUF_BFLAGS(x) ((x)->b_flags) +#define XFS_BUF_ZEROFLAGS(x) ((x)->b_flags = 0) +#define XFS_BUF_STALE(x) ((x)->b_flags |= XFS_B_STALE) +#define XFS_BUF_UNSTALE(x) ((x)->b_flags &= ~XFS_B_STALE) +#define XFS_BUF_ISSTALE(x) ((x)->b_flags & XFS_B_STALE) +#define XFS_BUF_SUPER_STALE(x) {(x)->b_flags |= XFS_B_STALE;\ + (x)->b_flags &= ~(XFS_B_DELWRI|B_CACHE);} + +#define XFS_BUF_MANAGE B_MANAGED +#define XFS_BUF_UNMANAGE(x) ((x)->b_flags &= ~B_MANAGED) + +#define XFS_BUF_DELAYWRITE(x) ((x)->b_flags |= XFS_B_DELWRI) +#define XFS_BUF_UNDELAYWRITE(x) ((x)->b_flags &= ~XFS_B_DELWRI) +#define XFS_BUF_ISDELAYWRITE(x) ((x)->b_flags & XFS_B_DELWRI) + +#define XFS_BUF_ERROR(x,no) xfs_buf_set_error((x), (no)) +#define XFS_BUF_GETERROR(x) xfs_buf_get_error(x) +#define XFS_BUF_ISERROR(x) (((x)->b_ioflags & BIO_ERROR) != 0) + +void static __inline__ +xfs_buf_set_error(struct buf *bp, int err) +{ + bp->b_ioflags |= BIO_ERROR; + bp->b_error = err; +} + +int static __inline__ +xfs_buf_get_error(struct buf *bp) +{ + return XFS_BUF_ISERROR(bp) ? (bp->b_error ? bp->b_error : EIO) : 0; +} + +#define XFS_BUF_DONE(x) ((x)->b_flags |= B_CACHE) +#define XFS_BUF_UNDONE(x) ((x)->b_flags &= ~B_CACHE) +#define XFS_BUF_ISDONE(x) ((x)->b_flags & B_CACHE) + +#define XFS_BUF_BUSY(x) ((x)->b_flags |= BUF_BUSY) +#define XFS_BUF_UNBUSY(x) ((x)->b_flags &= ~BUF_BUSY) +#define XFS_BUF_ISBUSY(x) (1) + +#define XFS_BUF_ASYNC(x) ((x)->b_flags |= B_ASYNC) +#define XFS_BUF_UNASYNC(x) ((x)->b_flags &= ~B_ASYNC) +#define XFS_BUF_ISASYNC(x) ((x)->b_flags & B_ASYNC) + +#define XFS_BUF_FLUSH(x) ((x)->b_flags |= B_00800000) +#define XFS_BUF_UNFLUSH(x) ((x)->b_flags &= ~B_00800000) +#define XFS_BUF_ISFLUSH(x) ((x)->b_flags & B_00800000) + +#define XFS_BUF_SHUT(x) printf("XFS_BUF_SHUT not implemented yet\n") +#define XFS_BUF_UNSHUT(x) printf("XFS_BUF_UNSHUT not implemented yet\n") +#define XFS_BUF_ISSHUT(x) (0) + +#define XFS_BUF_HOLD(x) ((void)0) +#define XFS_BUF_UNHOLD(x) ((void)0) +#define XFS_BUF_ISHOLD(x) BUF_REFCNT(x) + +#define XFS_BUF_READ(x) ((x)->b_iocmd = BIO_READ) +#define XFS_BUF_UNREAD(x) ((x)->b_iocmd = 0) +#define XFS_BUF_ISREAD(x) ((x)->b_iocmd == BIO_READ) + +#define XFS_BUF_WRITE(x) ((x)->b_iocmd = BIO_WRITE) +#define XFS_BUF_UNWRITE(x) ((x)->b_iocmd = 0) +#define XFS_BUF_ISWRITE(x) ((x)->b_iocmd == BIO_WRITE) + +#define XFS_BUF_ISUNINITIAL(x) (0) +#define XFS_BUF_UNUNINITIAL(x) (0) + +#define XFS_BUF_IODONE_FUNC(x) (x)->b_iodone +#define XFS_BUF_SET_IODONE_FUNC(x, f) (x)->b_iodone = (f) +#define XFS_BUF_CLR_IODONE_FUNC(x) (x)->b_iodone = NULL + +#define XFS_BUF_SET_BDSTRAT_FUNC(x, f) do { if(f != NULL) {} } while(0) +#define XFS_BUF_CLR_BDSTRAT_FUNC(x) ((void)0) + +#define XFS_BUF_BP_ISMAPPED(bp) (1) + +#define XFS_BUF_FSPRIVATE(buf, type) \ + ((type)(buf)->b_fsprivate1) +#define XFS_BUF_SET_FSPRIVATE(buf, value) \ + (buf)->b_fsprivate1 = (void *)(value) +#define XFS_BUF_FSPRIVATE2(buf, type) \ + ((type)(buf)->b_fsprivate2) +#define XFS_BUF_SET_FSPRIVATE2(buf, value) \ + (buf)->b_fsprivate2 = (void *)(value) +#define XFS_BUF_FSPRIVATE3(buf, type) \ + ((type)(buf)->b_fsprivate3) +#define XFS_BUF_SET_FSPRIVATE3(buf, value) \ + (buf)->b_fsprivate3 = (void *)(value) +#define XFS_BUF_SET_START(buf) \ + printf("XFS_BUF_SET_START: %s:%d\n", __FILE__, __LINE__) + +#define XFS_BUF_SET_BRELSE_FUNC(buf, value) \ + do { \ + printf("XFS_BUF_SET_BRELSE_FUNC: %s:%d\n", \ + __FILE__, __LINE__); \ + if (value != NULL ) {} \ + } while(0) + +#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_data) + +static __inline xfs_caddr_t +xfs_buf_offset(xfs_buf_t *bp, size_t offset) +{ + return XFS_BUF_PTR(bp) + offset; +} + +#define XFS_BUF_SET_PTR(bp, val, count) \ + do { \ + (bp)->b_data = (val); \ + (bp)->b_bcount = (count); \ + } while(0) + +#define XFS_BUF_ADDR(bp) ((bp)->b_blkno) +#define XFS_BUF_SET_ADDR(bp, blk) \ + ((bp)->b_blkno = blk) +#define XFS_BUF_OFFSET(bp) ((bp)->b_offset) +#define XFS_BUF_SET_OFFSET(bp, off) \ + ((bp)->b_offset = off) +#define XFS_BUF_COUNT(bp) ((bp)->b_bcount) +#define XFS_BUF_SET_COUNT(bp, cnt) \ + ((bp)->b_bcount = cnt) +#define XFS_BUF_SIZE(bp) ((bp)->b_bufsize) +#define XFS_BUF_SET_SIZE(bp, cnt) \ + ((bp)->b_bufsize = cnt) +#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) +#define XFS_BUF_SET_VTYPE(bp, type) +#define XFS_BUF_SET_REF(bp, ref) + +#define XFS_BUF_VALUSEMA(bp) (BUF_REFCNT(bp)? 0 : 1) +#define XFS_BUF_CPSEMA(bp) \ + (BUF_LOCK(bp, LK_EXCLUSIVE|LK_CANRECURSE | LK_SLEEPFAIL, NULL) == 0) +#define XFS_BUF_PSEMA(bp,x) BUF_LOCK(bp, LK_EXCLUSIVE|LK_CANRECURSE, NULL) +#define XFS_BUF_VSEMA(bp) BUF_UNLOCK(bp) +#define XFS_BUF_V_IODONESEMA(bp) bdone(bp) + +/* setup the buffer target from a buftarg structure */ +#define XFS_BUF_SET_TARGET(bp, target) \ + xfs_buf_set_target(bp, target) + +void xfs_buf_set_target(xfs_buf_t *, xfs_buftarg_t *); +xfs_buftarg_t *xfs_buf_get_target(xfs_buf_t *); + +/* return the dev_t being used */ +#define XFS_BUF_TARGET(bp) xfs_buf_get_target(bp) +#define XFS_BUFTARG_NAME(targp) devtoname((targp)->dev) + +#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) +#define XFS_BUF_SET_VTYPE(bp, type) +#define XFS_BUF_SET_REF(bp, ref) + +#define XFS_BUF_ISPINNED(bp) xfs_ispin(bp) + +xfs_buf_t * +xfs_buf_read_flags(xfs_buftarg_t *, xfs_daddr_t, size_t, int); + +#define xfs_buf_read(target, blkno, len, flags) \ + xfs_buf_read_flags(target, blkno, len, \ + XFS_BUF_LOCK | XFS_BUF_MAPPED) + +xfs_buf_t * +xfs_buf_get_flags(xfs_buftarg_t *, xfs_daddr_t, size_t, int); + +#define xfs_buf_get(target, blkno, len, flags) \ + xfs_buf_get_flags(target, blkno, len, \ + XFS_BUF_LOCK | XFS_BUF_MAPPED) + +#define xfs_bdwrite(mp, bp) bdwrite(bp) +/* + { ((bp)->b_vp == NULL) ? (bp)->b_bdstrat = xfs_bdstrat_cb: 0; \ + (bp)->b_fsprivate3 = (mp); bdwrite(bp);} +*/ +#define xfs_bawrite(mp, bp) bawrite(bp) +/* + { ((bp)->b_vp == NULL) ? (bp)->b_bdstrat = xfs_bdstrat_cb: 0; \ + (bp)->b_fsprivate3 = (mp); bawrite(bp);} +*/ + +#define xfs_buf_relse(bp) brelse(bp) +#define xfs_bp_mapin(bp) bp_mapin(bp) +#define xfs_xfsd_list_evict(x) _xfs_xfsd_list_evict(x) +#define xfs_buftrace(x,y) CTR2(KTR_BUF, "%s bp %p flags %X", bp, bp->b_flags) +#define xfs_biodone(bp) bufdone_finish(bp) + +#define xfs_incore(xfs_buftarg,blkno,len,lockit) \ + incore(&xfs_buftarg->specvp->v_bufobj, blkno); + +#define xfs_biomove(pb, off, len, data, rw) \ + panic("%s:%d: xfs_biomove NI", __FILE__, __LINE__) + +#define xfs_biozero(pb, off, len) \ + panic("%s:%d: xfs_biozero NI", __FILE__, __LINE__) + +/* already a function xfs_bwrite... fix this */ +#define XFS_bdwrite(bp) bdwrite(bp) +#define xfs_iowait(bp) bufwait(bp) + +#define xfs_binval(buftarg) printf("binval(buftarg.dev) NI\n") +#define XFS_bflush(buftarg) printf("bflush(buftarg.dev) NI\n") + +#define XFS_bdstrat(bp) printf("XFS_bdstrat NI\n") + +#define xfs_incore_relse(buftarg,delwri_only,wait) \ + printf("incore_relse(buftarg.dev,delwri_only,wait) NI\n") + +#define xfs_incore_match(buftarg,blkno,len,field,value) \ + printf("incore_match(buftarg.dev,blkno,len,field,value) NI \n") + +void xfs_baread(xfs_buftarg_t *targp, xfs_daddr_t ioff, size_t isize); + +extern void pdflush(struct vnode *, uint64_t); +#define XFS_pdflush(vnode,flags) \ + pdflush(vnode,flags) + +struct xfs_mount; + +int XFS_bwrite(xfs_buf_t *bp); +xfs_buf_t* xfs_buf_get_empty(size_t, xfs_buftarg_t *targ); +xfs_buf_t* xfs_buf_get_noaddr(size_t, xfs_buftarg_t *targ); +void xfs_buf_free(xfs_buf_t *); +int xfs_buf_iorequest(struct xfs_buf *bp); + +void XFS_freerbuf(xfs_buf_t *bp); +void XFS_nfreerbuf(xfs_buf_t *bp); + +void xfs_bpin(xfs_buf_t *bp); +void xfs_bunpin(xfs_buf_t *bp); +int xfs_ispin(xfs_buf_t *bp); +void xfs_bwait_unpin(xfs_buf_t *bp); + +#endif diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_compat.h b/sys/gnu/fs/xfs/FreeBSD/xfs_compat.h new file mode 100644 index 000000000000..d89e38b1593c --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_compat.h @@ -0,0 +1,165 @@ +#ifndef __XFS_COMPAT_H__ +#define __XFS_COMPAT_H__ + +#include <sys/param.h> +#include <sys/libkern.h> +#include <sys/limits.h> +#include <sys/uuid.h> +#include <sys/conf.h> +#include <sys/sbuf.h> +#include <sys/stat.h> +#include <sys/ioccom.h> +#include <sys/fcntl.h> +#include <sys/dirent.h> +#include <sys/ktr.h> +#include <sys/kdb.h> + +#ifdef _KERNEL +#define __KERNEL__ +#endif + +#define printk printf + +#define MAJOR(x) major(x) +#define MINOR(x) minor(x) + +/* + * SYSV compatibility types missing in FreeBSD. + */ +typedef unsigned long ulong; +typedef unsigned int uint; +typedef unsigned short ushort; + +/* + * Additional type declarations for XFS. + */ +typedef signed char __s8; +typedef unsigned char __u8; +typedef signed short int __s16; +typedef unsigned short int __u16; +typedef signed int __s32; +typedef unsigned int __u32; +typedef signed long long int __s64; +typedef unsigned long long int __u64; + +/* + * Linux types with direct FreeBSD conterparts + */ +typedef off_t loff_t; +typedef struct timespec timespec_t; +typedef struct uuid uuid_t; +typedef struct fid fid_t; +typedef dev_t os_dev_t; + +/* + * Linux block devices are device vnodes in FreeBSD. + */ +#define block_device vnode + +/* + * Get the current CPU ID. + */ +#define smp_processor_id() PCPU_GET(cpuid) + +/* + * FreeBSD does not have BITS_PER_LONG defined. + */ +#if defined(LONG_BIT) +#define BITS_PER_LONG LONG_BIT +#elif defined(__i386__) +#define BITS_PER_LONG 32 +#endif + +/* + * boolean_t is enum on Linux, int on FreeBSD. + * Provide value defines. + */ +#define B_FALSE 0 +#define B_TRUE 1 + +/* + * GCC 3.x static branch prediction hints + */ +#if __GNUC__ < 3 +#define __builtin_expect(x, expected_value) (x) +#endif + +#ifndef likely +#define likely(x) __builtin_expect((x), 1) +#endif + +#ifndef unlikely +#define unlikely(x) __builtin_expect((x), 0) +#endif + +/* + * ANSI and GCC extension keywords compatibity + */ +#ifndef inline +#define inline __inline__ +#endif + +#ifndef asm +#define asm __asm +#endif + +#ifndef typeof +#define typeof __typeof +#endif + +/* + * Miscellaneous limit constants + */ +#define MAX_LFS_FILESIZE 0x7fffffffffffffffLL + +/* + * Map simple functions to their FreeBSD kernel equivalents + */ +#ifndef copy_to_user +#define copy_to_user(dst, src, len) copyout((src), (dst), (len)) +#endif + +#ifndef copy_from_user +#define copy_from_user(dst, src, len) copyin((src), (dst), (len)) +#endif + +#ifndef memmove +#define memmove(dst, src, len) bcopy((src), (dst), (len)) +#endif + +#ifndef barrier +#define barrier() __asm__ __volatile__("": : :"memory") +#endif + +/* + * Map simple global vairables to FreeBSD kernel equivalents + */ +#if !defined(xfs_physmem) +#define xfs_physmem physmem +#endif + +#ifndef HZ +#define HZ hz +#endif + +/* + * These should be implemented properly for all architectures + * we want to support. + */ +#define get_unaligned(ptr) (*(ptr)) +#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) )) + +/* + * Linux type-safe min/max macros. + */ +#define min_t(type,x,y) MIN((x),(y)) +#define max_t(type,x,y) MAX((x),(y)) + + +/* + * Cedentials manipulation. + */ +#define current_fsuid(credp) (credp)->cr_uid +#define current_fsgid(credp) (credp)->cr_groups[0] + +#endif /* __XFS_COMPAT_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_config.h b/sys/gnu/fs/xfs/FreeBSD/xfs_config.h new file mode 100644 index 000000000000..a115f5438e8a --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_config.h @@ -0,0 +1,38 @@ +#ifndef __XFS_CONFIG_H__ +#define __XFS_CONFIG_H__ + +#define HAVE_FID 1 +/* + * Linux config variables, harcoded to values desirable for FreeBSD. + */ +#define CONFIG_SYSCTL 1 +#define CONFIG_LBD 1 +#define CONFIG_XFS_TRACE 0 + +/* + * Tracing. + */ +#if CONFIG_XFS_TRACE == 1 +#define XFS_ALLOC_TRACE 1 +#define XFS_ALLOC_TRACE 1 +#define XFS_ATTR_TRACE 1 +#define XFS_BLI_TRACE 1 +#define XFS_BMAP_TRACE 1 +#define XFS_BMBT_TRACE 1 +#define XFS_DIR_TRACE 1 +#define XFS_DIR2_TRACE 1 +#define XFS_DQUOT_TRACE 1 +#define XFS_ILOCK_TRACE 1 +#define XFS_LOG_TRACE 1 +#define XFS_RW_TRACE 1 +#endif + +/* + * XFS config defines. + */ +#define XFS_BIG_BLKNOS 1 +#define XFS_BIG_INUMS 0 + +#undef XFS_STATS_OFF + +#endif /* __XFS_CONFIG_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_cred.h b/sys/gnu/fs/xfs/FreeBSD/xfs_cred.h new file mode 100644 index 000000000000..bc599776e66d --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_cred.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_CRED_H__ +#define __XFS_CRED_H__ + +#include <sys/ucred.h> +/* + * struct cred is struct ucred on FreeBSD + */ +typedef struct ucred cred_t; + +#define cred ucred + +#define capable(cap) (1) +#define capable_cred(cr, cap) (1) + +#endif /* __XFS_CRED_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_dmistubs.c b/sys/gnu/fs/xfs/FreeBSD/xfs_dmistubs.c new file mode 100644 index 000000000000..746747efe7cc --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_dmistubs.c @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" + +#include "xfs_macros.h" +#include "xfs_types.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir.h" +#include "xfs_dir2.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" + +static int nopkg(void); + +static __inline int +nopkg() +{ + return (ENOSYS); +} + +int +dmapi_init (void) +{ + return (0); +} + +void +dmapi_uninit (void) +{ +} + +int dm_data_event(void); +int +dm_data_event (void) +{ + return nopkg(); +} + +int dm_namesp_event(void); +int +dm_namesp_event (void) +{ + return nopkg(); +} + +/* The following stubs are for routines needed for the X/Open + * version of DMAPI. + */ +int xfs_dm_mount(xfs_vfs_t *, xfs_vnode_t *, char *, char *); +int +xfs_dm_mount( + xfs_vfs_t *vfsp, + xfs_vnode_t *mvp, + char *dir_name, + char *fsname) +{ + return nopkg(); +} + +int +dm_send_destroy_event(bhv_desc_t *bdp, dm_right_t vp_right); +int +dm_send_destroy_event(bhv_desc_t *bdp, dm_right_t vp_right) +{ + return nopkg(); +} + +int +dm_send_mount_event(xfs_vfs_t *vfsp, dm_right_t vfsp_right, bhv_desc_t *bdp, + dm_right_t vp_right, bhv_desc_t *rootbdp, dm_right_t rootvp_right, + char *name1, char *name2); +int +dm_send_mount_event(xfs_vfs_t *vfsp, dm_right_t vfsp_right, bhv_desc_t *bdp, + dm_right_t vp_right, bhv_desc_t *rootbdp, dm_right_t rootvp_right, + char *name1, char *name2) +{ + return nopkg(); +} + + +int +dm_send_namesp_event(dm_eventtype_t event, bhv_desc_t *bdp1, + dm_right_t vp1_right, bhv_desc_t *bdp2, dm_right_t vp2_right, + char *name1, char *name2, mode_t mode, int retcode, int flags); +int +dm_send_namesp_event(dm_eventtype_t event, bhv_desc_t *bdp1, + dm_right_t vp1_right, bhv_desc_t *bdp2, dm_right_t vp2_right, + char *name1, char *name2, mode_t mode, int retcode, int flags) +{ + return nopkg(); +} + + +void +dm_send_unmount_event(xfs_vfs_t *vfsp, xfs_vnode_t *vp, dm_right_t vfsp_right, + mode_t mode, int retcode, int flags); +void +dm_send_unmount_event(xfs_vfs_t *vfsp, xfs_vnode_t *vp, dm_right_t vfsp_right, + mode_t mode, int retcode, int flags) +{ +} + + +int +dm_vp_to_handle (xfs_vnode_t *vp, xfs_handle_t *handlep); +int +dm_vp_to_handle (xfs_vnode_t *vp, xfs_handle_t *handlep) +{ + return nopkg(); +} diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_freebsd.h b/sys/gnu/fs/xfs/FreeBSD/xfs_freebsd.h new file mode 100644 index 000000000000..d7a30d6ecd0d --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_freebsd.h @@ -0,0 +1,356 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_FREEBSD__ +#define __XFS_FREEBSD__ + +#include <xfs_config.h> +#include <xfs_compat.h> + +/* + * Some types are conditional depending on the target system. + * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits. + * XFS_BIG_INUMS needs the VFS inode number to be 64 bits, as well + * as requiring XFS_BIG_BLKNOS to be set. + */ +#define XFS_BIG_BLKNOS 1 +#define XFS_BIG_INUMS 0 + +#include <xfs_types.h> +#include <xfs_arch.h> + +#include <support/atomic.h> +#include <support/kmem.h> +#include <support/mrlock.h> +#include <support/spin.h> +#include <support/sv.h> +#include <support/ktrace.h> +#include <support/mutex.h> +#include <support/rwsem.h> +#include <support/rwlock.h> +#include <support/sema.h> +#include <support/debug.h> +#include <support/move.h> +#include <support/uuid.h> +#include <support/time.h> + +#include <xfs_behavior.h> +#include <xfs_cred.h> +#include <xfs_vfs.h> +#include <xfs_vnode.h> +#include <xfs_stats.h> +#include <xfs_sysctl.h> +#include <xfs_iops.h> +#include <xfs_super.h> +#include <xfs_fs_subr.h> +#include <xfs_buf.h> +#include <xfs_frw.h> +#include <xfs_log.h> + +/* + * Feature macros (disable/enable) + */ +#undef HAVE_REFCACHE /* Do not use refcache. */ +#undef HAVE_SENDFILE /* sendfile(2) is available in FreeBSD. */ + +#ifndef EVMS_MAJOR +#define EVMS_MAJOR 117 +#endif + +#define xfs_refcache_size xfs_params.refcache_size.val +#define xfs_refcache_purge_count xfs_params.refcache_purge.val +#define restricted_chown xfs_params.restrict_chown.val +#define irix_sgid_inherit xfs_params.sgid_inherit.val +#define irix_symlink_mode xfs_params.symlink_mode.val +#define xfs_panic_mask xfs_params.panic_mask.val +#define xfs_error_level xfs_params.error_level.val +#define xfs_syncd_interval xfs_params.sync_interval.val +#define xfs_probe_dmapi xfs_params.probe_dmapi.val +#define xfs_probe_ioops xfs_params.probe_ioops.val +#define xfs_probe_quota xfs_params.probe_quota.val +#define xfs_stats_clear xfs_params.stats_clear.val +#define xfs_inherit_sync xfs_params.inherit_sync.val +#define xfs_inherit_nodump xfs_params.inherit_nodump.val +#define xfs_inherit_noatime xfs_params.inherit_noatim.val +#define xfs_flush_interval xfs_params.flush_interval.val +#define xfs_age_buffer xfs_params.age_buffer.val +#define xfs_io_bypass xfs_params.io_bypass.val + +#define current_cpu() smp_processor_id() +#define current_pid() (curthread->td_proc->p_pid) + +#define NBPP PAGE_SIZE +#define DPPSHFT (PAGE_SHIFT - 9) +#define NDPP (1 << (PAGE_SHIFT - 9)) +#define dtop(DD) (((DD) + NDPP - 1) >> DPPSHFT) +#define dtopt(DD) ((DD) >> DPPSHFT) +#define dpoff(DD) ((DD) & (NDPP-1)) + +#define NBBY 8 /* number of bits per byte */ +#define NBPC PAGE_SIZE /* Number of bytes per click */ +#define BPCSHIFT PAGE_SHIFT /* LOG2(NBPC) if exact */ + +/* + * Size of block device i/o is parameterized here. + * Currently the system supports page-sized i/o. + */ +#define BLKDEV_IOSHIFT BPCSHIFT +#ifndef BLKDEV_IOSIZE +#define BLKDEV_IOSIZE (1<<BLKDEV_IOSHIFT) +#else +# if NBPC != BLKDEV_IOSIZE +# error Wrong BLKDEV_IOSIZE +# endif +#endif +/* number of BB's per block device block */ +#define BLKDEV_BB BTOBB(BLKDEV_IOSIZE) + +/* bytes to clicks */ +#define btoct(x) ((__psunsigned_t)(x)>>BPCSHIFT) +#define btoc64(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT) +#define btoct64(x) ((__uint64_t)(x)>>BPCSHIFT) +#define io_btoc(x) (((__psunsigned_t)(x)+(IO_NBPC-1))>>IO_BPCSHIFT) +#define io_btoct(x) ((__psunsigned_t)(x)>>IO_BPCSHIFT) + +/* off_t bytes to clicks */ +#define offtoc(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT) +#define offtoct(x) ((xfs_off_t)(x)>>BPCSHIFT) + +/* clicks to off_t bytes */ +#define ctooff(x) ((xfs_off_t)(x)<<BPCSHIFT) + +/* clicks to bytes */ +#define btoct(x) ((__psunsigned_t)(x)>>BPCSHIFT) +#define ctob64(x) ((__uint64_t)(x)<<BPCSHIFT) +#define io_ctob(x) ((__psunsigned_t)(x)<<IO_BPCSHIFT) + +#ifndef CELL_CAPABLE +#define FSC_NOTIFY_NAME_CHANGED(vp) +#endif + +#ifndef ENOATTR +#define ENOATTR ENODATA /* Attribute not found */ +#endif + +/* Note: EWRONGFS never visible outside the kernel */ +#define EWRONGFS EINVAL /* Mount with wrong filesystem type */ + +/* + * XXX EFSCORRUPTED needs a real value in errno.h. asm-i386/errno.h won't + * return codes out of its known range in errno. + * XXX Also note: needs to be < 1000 and fairly unique on Linux (mustn't + * conflict with any code we use already or any code a driver may use) + * XXX Some options (currently we do #2): + * 1/ New error code ["Filesystem is corrupted", _after_ glibc updated] + * 2/ 990 ["Unknown error 990"] + * 3/ EUCLEAN ["Structure needs cleaning"] + * 4/ Convert EFSCORRUPTED to EIO [just prior to return into userspace] + */ +#define EFSCORRUPTED 990 /* Filesystem is corrupted */ + +#define SYNCHRONIZE() barrier() +#define __return_address __builtin_return_address(0) + +/* + * IRIX (BSD) quotactl makes use of separate commands for user/group, + * whereas on Linux the syscall encodes this information into the cmd + * field (see the QCMD macro in quota.h). These macros help keep the + * code portable - they are not visible from the syscall interface. + */ +#define Q_XSETGQLIM XQM_CMD(0x8) /* set groups disk limits */ +#define Q_XGETGQUOTA XQM_CMD(0x9) /* get groups disk limits */ + +/* IRIX uses a dynamic sizing algorithm (ndquot = 200 + numprocs*2) */ +/* we may well need to fine-tune this if it ever becomes an issue. */ +#define DQUOT_MAX_HEURISTIC 1024 /* NR_DQUOTS */ +#define ndquot DQUOT_MAX_HEURISTIC + +/* IRIX uses the current size of the name cache to guess a good value */ +/* - this isn't the same but is a good enough starting point for now. */ +#define DQUOT_HASH_HEURISTIC files_stat.nr_files + +/* IRIX inodes maintain the project ID also, zero this field on Linux */ +#define DEFAULT_PROJID 0 +#define dfltprid DEFAULT_PROJID + +#define FINVIS 0x0102 /* don't update timestamps - XFS */ + +#define howmany(x, y) (((x)+((y)-1))/(y)) +#define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) + +/* + * Juggle IRIX device numbers - still used in ondisk structures + */ +#define XFS_DEV_BITSMAJOR 14 +#define XFS_DEV_BITSMINOR 18 +#define XFS_DEV_MAXMAJ 0x1ff +#define XFS_DEV_MAXMIN 0x3ffff +#define XFS_DEV_MAJOR(dev) ((int)(((unsigned)(dev)>>XFS_DEV_BITSMINOR) \ + & XFS_DEV_MAXMAJ)) +#define XFS_DEV_MINOR(dev) ((int)((dev)&XFS_DEV_MAXMIN)) +#define XFS_MKDEV(major,minor) ((xfs_dev_t)(((major)<<XFS_DEV_BITSMINOR) \ + | (minor&XFS_DEV_MAXMIN))) + +#define XFS_DEV_TO_KDEVT(dev) mk_kdev(XFS_DEV_MAJOR(dev),XFS_DEV_MINOR(dev)) + + +/* Produce a kernel stack trace */ + +static inline void xfs_stack_trace(void) +{ + kdb_backtrace(); +} + + +/* Move the kernel do_div definition off to one side */ + +#if defined __i386__ +/* For ia32 we need to pull some tricks to get past various versions + * of the compiler which do not like us using do_div in the middle + * of large functions. + */ +static inline __u32 xfs_do_div(void *a, __u32 b, int n) +{ + __u32 mod; + + switch (n) { + case 4: + mod = *(__u32 *)a % b; + *(__u32 *)a = *(__u32 *)a / b; + return mod; + case 8: + { + unsigned long __upper, __low, __high, __mod; + __u64 c = *(__u64 *)a; + __upper = __high = c >> 32; + __low = c; + if (__high) { + __upper = __high % (b); + __high = __high / (b); + } + asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); + asm("":"=A" (c):"a" (__low),"d" (__high)); + *(__u64 *)a = c; + return __mod; + } + } + + /* NOTREACHED */ + return 0; +} + +/* Side effect free 64 bit mod operation */ +static inline __u32 xfs_do_mod(void *a, __u32 b, int n) +{ + switch (n) { + case 4: + return *(__u32 *)a % b; + case 8: + { + unsigned long __upper, __low, __high, __mod; + __u64 c = *(__u64 *)a; + __upper = __high = c >> 32; + __low = c; + if (__high) { + __upper = __high % (b); + __high = __high / (b); + } + asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); + asm("":"=A" (c):"a" (__low),"d" (__high)); + return __mod; + } + } + + /* NOTREACHED */ + return 0; +} +#else + +#define do_div(n, base) ({\ + int __res; \ + __res = ((__u64)(n)) % (__u32)(base); \ + (n) = ((__u64)(n)) / (__u32)(base); \ + __res; }) + +static inline __u32 xfs_do_div(void *a, __u32 b, int n) +{ + __u32 mod; + + switch (n) { + case 4: + mod = *(__u32 *)a % b; + *(__u32 *)a = *(__u32 *)a / b; + return mod; + case 8: + mod = do_div(*(__u64 *)a, b); + return mod; + } + + /* NOTREACHED */ + return 0; +} + +/* Side effect free 64 bit mod operation */ +static inline __u32 xfs_do_mod(void *a, __u32 b, int n) +{ + switch (n) { + case 4: + return *(__u32 *)a % b; + case 8: + { + __u64 c = *(__u64 *)a; + return do_div(c, b); + } + } + + /* NOTREACHED */ + return 0; +} +#endif + +#undef do_div +#define do_div(a, b) xfs_do_div(&(a), (b), sizeof(a)) +#define do_mod(a, b) xfs_do_mod(&(a), (b), sizeof(a)) + +static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y) +{ + x += y - 1; + do_div(x, y); + return(x * y); +} + +static inline unsigned long ffz(unsigned long val) +{ + val = ffsl(~val); + return val; +} + +#endif /* __XFS_FREEBSD__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_frw.c b/sys/gnu/fs/xfs/FreeBSD/xfs_frw.c new file mode 100644 index 000000000000..ac9f2dcada41 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_frw.c @@ -0,0 +1,1040 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +/* + * fs/xfs/linux/xfs_lrw.c (Linux Read Write stuff) + * + */ + +#include "xfs.h" + +#include "xfs_fs.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir.h" +#include "xfs_dir2.h" +#include "xfs_alloc.h" +#include "xfs_dmapi.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_alloc_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_attr_sf.h" +#include "xfs_dir_sf.h" +#include "xfs_dir2_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_bmap.h" +#include "xfs_bit.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_itable.h" +#include "xfs_rw.h" +#include "xfs_refcache.h" +#include "xfs_acl.h" +#include "xfs_cap.h" +#include "xfs_mac.h" +#include "xfs_attr.h" +#include "xfs_inode_item.h" +#include "xfs_buf_item.h" +#include "xfs_utils.h" +#include "xfs_iomap.h" + +#if defined(XFS_RW_TRACE) +void +xfs_rw_enter_trace( + int tag, + xfs_iocore_t *io, + const char *buf, + size_t size, + loff_t offset, + int ioflags) +{ + xfs_inode_t *ip = XFS_IO_INODE(io); + + if (ip->i_rwtrace == NULL) + return; + ktrace_enter(ip->i_rwtrace, + (void *)(unsigned long)tag, + (void *)ip, + (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)), + (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)), + (void *)(__psint_t)buf, + (void *)((unsigned long)size), + (void *)((unsigned long)((offset >> 32) & 0xffffffff)), + (void *)((unsigned long)(offset & 0xffffffff)), + (void *)((unsigned long)ioflags), + (void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)), + (void *)((unsigned long)(io->io_new_size & 0xffffffff)), + (void *)NULL, + (void *)NULL, + (void *)NULL, + (void *)NULL, + (void *)NULL); +} + +void +xfs_inval_cached_trace( + xfs_iocore_t *io, + xfs_off_t offset, + xfs_off_t len, + xfs_off_t first, + xfs_off_t last) +{ + xfs_inode_t *ip = XFS_IO_INODE(io); + + if (ip->i_rwtrace == NULL) + return; + ktrace_enter(ip->i_rwtrace, + (void *)(__psint_t)XFS_INVAL_CACHED, + (void *)ip, + (void *)((unsigned long)((offset >> 32) & 0xffffffff)), + (void *)((unsigned long)(offset & 0xffffffff)), + (void *)((unsigned long)((len >> 32) & 0xffffffff)), + (void *)((unsigned long)(len & 0xffffffff)), + (void *)((unsigned long)((first >> 32) & 0xffffffff)), + (void *)((unsigned long)(first & 0xffffffff)), + (void *)((unsigned long)((last >> 32) & 0xffffffff)), + (void *)((unsigned long)(last & 0xffffffff)), + (void *)NULL, + (void *)NULL, + (void *)NULL, + (void *)NULL, + (void *)NULL, + (void *)NULL); +} +#endif + +/* + * xfs_iozero + * + * xfs_iozero clears the specified range of buffer supplied, + * and marks all the affected blocks as valid and modified. If + * an affected block is not allocated, it will be allocated. If + * an affected block is not completely overwritten, and is not + * valid before the operation, it will be read from disk before + * being partially zeroed. + */ +STATIC int +xfs_iozero( + xfs_vnode_t *vp, /* vnode */ + loff_t pos, /* offset in file */ + size_t count, /* size of data to zero */ + loff_t end_size) /* max file size to set */ +{ +#if XXXKAN + unsigned bytes; + struct page *page; + struct address_space *mapping; + char *kaddr; + int status; + + mapping = ip->i_mapping; + do { + unsigned long index, offset; + + offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ + index = pos >> PAGE_CACHE_SHIFT; + bytes = PAGE_CACHE_SIZE - offset; + if (bytes > count) + bytes = count; + + status = -ENOMEM; + page = grab_cache_page(mapping, index); + if (!page) + break; + + kaddr = kmap(page); + status = mapping->a_ops->prepare_write(NULL, page, offset, + offset + bytes); + if (status) { + goto unlock; + } + + memset((void *) (kaddr + offset), 0, bytes); + flush_dcache_page(page); + status = mapping->a_ops->commit_write(NULL, page, offset, + offset + bytes); + if (!status) { + pos += bytes; + count -= bytes; + if (pos > i_size_read(ip)) + i_size_write(ip, pos < end_size ? pos : end_size); + } + +unlock: + kunmap(page); + unlock_page(page); + page_cache_release(page); + if (status) + break; + } while (count); + + return (-status); +#endif /* XXXKAN */ + return (0); +} + +/* + * xfs_inval_cached_pages + * + * This routine is responsible for keeping direct I/O and buffered I/O + * somewhat coherent. From here we make sure that we're at least + * temporarily holding the inode I/O lock exclusively and then call + * the page cache to flush and invalidate any cached pages. If there + * are no cached pages this routine will be very quick. + */ +void +xfs_inval_cached_pages( + xfs_vnode_t *vp, + xfs_iocore_t *io, + xfs_off_t offset, + int write, + int relock) +{ + xfs_mount_t *mp; + + if (!VN_CACHED(vp)) { + return; + } + + mp = io->io_mount; + + /* + * We need to get the I/O lock exclusively in order + * to safely invalidate pages and mappings. + */ + if (relock) { + XFS_IUNLOCK(mp, io, XFS_IOLOCK_SHARED); + XFS_ILOCK(mp, io, XFS_IOLOCK_EXCL); + } + + /* Writing beyond EOF creates a hole that must be zeroed */ + if (write && (offset > XFS_SIZE(mp, io))) { + xfs_fsize_t isize; + + XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); + isize = XFS_SIZE(mp, io); + if (offset > isize) { + xfs_zero_eof(vp, io, offset, isize, offset); + } + XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); + } + + xfs_inval_cached_trace(io, offset, -1, ctooff(offtoct(offset)), -1); + XVOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(offset)), -1, FI_REMAPF_LOCKED); + if (relock) { + XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL); + } +} + +int +xfs_read_file(xfs_mount_t *mp, xfs_inode_t *ip, struct uio *uio, int ioflag); + +ssize_t /* bytes read, or (-) error */ +xfs_read( + bhv_desc_t *bdp, + uio_t *uio, + int ioflags, + cred_t *credp) +{ + ssize_t ret, size; + xfs_fsize_t n; + xfs_inode_t *ip; + xfs_mount_t *mp; + + ip = XFS_BHVTOI(bdp); + mp = ip->i_mount; + + XFS_STATS_INC(xs_read_calls); + + if (unlikely(ioflags & IO_ISDIRECT)) { + if (((__psint_t)buf & BBMASK) || + (uio->uio_offset & mp->m_blockmask) || + (uio->uio_resid & mp->m_blockmask)) { + if (uio->uio_offset >= ip->i_d.di_size) { + return (0); + } + return EINVAL; + } + } + + if (uio->uio_resid == 0) + return 0; + n = XFS_MAXIOFFSET(mp) - uio->uio_offset; + if (n <= 0) + return EFBIG; + + size = (n < uio->uio_resid)? n : uio->uio_resid; + + if (XFS_FORCED_SHUTDOWN(mp)) { + return EIO; + } + + if (!(ioflags & IO_ISLOCKED)) + xfs_ilock(ip, XFS_IOLOCK_SHARED); +#if XXXKAN + if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) && + !(ioflags & IO_INVIS)) { + int error; + vrwlock_t locktype = VRWLOCK_READ; + int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); + + error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), + uio->uio_offset, size, dmflags, &locktype); + if (error) { + if (!(ioflags & IO_ISLOCKED)) + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + return (error); + } + } + + if (unlikely(ioflags & IO_ISDIRECT)) { + xfs_rw_enter_trace(XFS_DIORD_ENTER, &ip->i_iocore, + buf, size, *offset, ioflags); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,22) + xfs_off_t pos = uio->uio_offset; + + ret = generic_file_direct_IO(READ, file, buf, size, pos); + if (ret > 0) + uio->uio_offset = pos + ret; + + UPDATE_ATIME(file->f_dentry->d_inode); +#else + ret = generic_file_read(file, buf, size, offset); +#endif + } else { + xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore, + buf, size, *offset, ioflags); + ret = generic_file_read(file, buf, size, offset); + } +#else /* XXXKAN */ + ret = xfs_read_file(mp, ip, uio, ioflags); +#endif /* XXXKAN */ + + if (!(ioflags & IO_ISLOCKED)) + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + + XFS_STATS_ADD(xs_read_bytes, ret); + + if (likely((ioflags & IO_INVIS) == 0)) { + xfs_ichgtime(ip, XFS_ICHGTIME_ACC); + } + + return ret; +} + +/* + * This routine is called to handle zeroing any space in the last + * block of the file that is beyond the EOF. We do this since the + * size is being increased without writing anything to that block + * and we don't want anyone to read the garbage on the disk. + */ +STATIC int /* error (positive) */ +xfs_zero_last_block( + xfs_vnode_t *vp, + xfs_iocore_t *io, + xfs_off_t offset, + xfs_fsize_t isize, + xfs_fsize_t end_size) +{ + xfs_fileoff_t last_fsb; + xfs_mount_t *mp; + int nimaps; + int zero_offset; + int zero_len; + int isize_fsb_offset; + int error = 0; + xfs_bmbt_irec_t imap; + loff_t loff; + size_t lsize; + + ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0); + ASSERT(offset > isize); + + mp = io->io_mount; + + isize_fsb_offset = XFS_B_FSB_OFFSET(mp, isize); + if (isize_fsb_offset == 0) { + /* + * There are no extra bytes in the last block on disk to + * zero, so return. + */ + return 0; + } + + last_fsb = XFS_B_TO_FSBT(mp, isize); + nimaps = 1; + error = XFS_BMAPI(mp, NULL, io, last_fsb, 1, 0, NULL, 0, &imap, + &nimaps, NULL); + if (error) { + return error; + } + ASSERT(nimaps > 0); + /* + * If the block underlying isize is just a hole, then there + * is nothing to zero. + */ + if (imap.br_startblock == HOLESTARTBLOCK) { + return 0; + } + /* + * Zero the part of the last block beyond the EOF, and write it + * out sync. We need to drop the ilock while we do this so we + * don't deadlock when the buffer cache calls back to us. + */ + XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD); + loff = XFS_FSB_TO_B(mp, last_fsb); + lsize = XFS_FSB_TO_B(mp, 1); + + zero_offset = isize_fsb_offset; + zero_len = mp->m_sb.sb_blocksize - isize_fsb_offset; + + error = xfs_iozero(vp, loff + zero_offset, zero_len, end_size); + + XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); + ASSERT(error >= 0); + return error; +} + +/* + * Zero any on disk space between the current EOF and the new, + * larger EOF. This handles the normal case of zeroing the remainder + * of the last block in the file and the unusual case of zeroing blocks + * out beyond the size of the file. This second case only happens + * with fixed size extents and when the system crashes before the inode + * size was updated but after blocks were allocated. If fill is set, + * then any holes in the range are filled and zeroed. If not, the holes + * are left alone as holes. + */ + +int /* error (positive) */ +xfs_zero_eof( + xfs_vnode_t *vp, + xfs_iocore_t *io, + xfs_off_t offset, /* starting I/O offset */ + xfs_fsize_t isize, /* current inode size */ + xfs_fsize_t end_size) /* terminal inode size */ +{ + xfs_fileoff_t start_zero_fsb; + xfs_fileoff_t end_zero_fsb; + xfs_fileoff_t prev_zero_fsb; + xfs_fileoff_t zero_count_fsb; + xfs_fileoff_t last_fsb; + xfs_extlen_t buf_len_fsb; + xfs_extlen_t prev_zero_count; + xfs_mount_t *mp; + int nimaps; + int error = 0; + xfs_bmbt_irec_t imap; + loff_t loff; + size_t lsize; + + ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); + ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); + + mp = io->io_mount; + + /* + * First handle zeroing the block on which isize resides. + * We only zero a part of that block so it is handled specially. + */ + error = xfs_zero_last_block(vp, io, offset, isize, end_size); + if (error) { + ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); + ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); + return error; + } + + /* + * Calculate the range between the new size and the old + * where blocks needing to be zeroed may exist. To get the + * block where the last byte in the file currently resides, + * we need to subtract one from the size and truncate back + * to a block boundary. We subtract 1 in case the size is + * exactly on a block boundary. + */ + last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1; + start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); + end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1); + ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb); + if (last_fsb == end_zero_fsb) { + /* + * The size was only incremented on its last block. + * We took care of that above, so just return. + */ + return 0; + } + + ASSERT(start_zero_fsb <= end_zero_fsb); + prev_zero_fsb = NULLFILEOFF; + prev_zero_count = 0; + while (start_zero_fsb <= end_zero_fsb) { + nimaps = 1; + zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; + error = XFS_BMAPI(mp, NULL, io, start_zero_fsb, zero_count_fsb, + 0, NULL, 0, &imap, &nimaps, NULL); + if (error) { + ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); + ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); + return error; + } + ASSERT(nimaps > 0); + + if (imap.br_state == XFS_EXT_UNWRITTEN || + imap.br_startblock == HOLESTARTBLOCK) { + /* + * This loop handles initializing pages that were + * partially initialized by the code below this + * loop. It basically zeroes the part of the page + * that sits on a hole and sets the page as P_HOLE + * and calls remapf if it is a mapped file. + */ + prev_zero_fsb = NULLFILEOFF; + prev_zero_count = 0; + start_zero_fsb = imap.br_startoff + + imap.br_blockcount; + ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); + continue; + } + + /* + * There are blocks in the range requested. + * Zero them a single write at a time. We actually + * don't zero the entire range returned if it is + * too big and simply loop around to get the rest. + * That is not the most efficient thing to do, but it + * is simple and this path should not be exercised often. + */ + buf_len_fsb = XFS_FILBLKS_MIN(imap.br_blockcount, + mp->m_writeio_blocks << 8); + /* + * Drop the inode lock while we're doing the I/O. + * We'll still have the iolock to protect us. + */ + XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); + + loff = XFS_FSB_TO_B(mp, start_zero_fsb); + lsize = XFS_FSB_TO_B(mp, buf_len_fsb); + + error = xfs_iozero(vp, loff, lsize, end_size); + + if (error) { + goto out_lock; + } + + prev_zero_fsb = start_zero_fsb; + prev_zero_count = buf_len_fsb; + start_zero_fsb = imap.br_startoff + buf_len_fsb; + ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); + + XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); + } + + return 0; + +out_lock: + + XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); + ASSERT(error >= 0); + return error; +} + +ssize_t /* bytes written, or (-) error */ +xfs_write( + bhv_desc_t *bdp, + uio_t *uiop, + int ioflags, + cred_t *credp) +{ +#if XXXKAN + xfs_inode_t *xip; + xfs_mount_t *mp; + ssize_t ret; + int error = 0; + xfs_fsize_t isize, new_size; + xfs_fsize_t n, limit; + xfs_iocore_t *io; + xfs_vnode_t *vp; + int iolock; + int eventsent = 0; + vrwlock_t locktype; + + XFS_STATS_INC(xs_write_calls); + + vp = BHV_TO_VNODE(bdp); + xip = XFS_BHVTOI(bdp); + + if (size == 0) + return 0; + + io = &xip->i_iocore; + mp = io->io_mount; + + xfs_check_frozen(mp, bdp, XFS_FREEZE_WRITE); + + if (XFS_FORCED_SHUTDOWN(xip->i_mount)) { + return EIO; + } + + if (unlikely(ioflags & IO_ISDIRECT)) { + if (((__psint_t)buf & BBMASK) || + (*offset & mp->m_blockmask) || + (size & mp->m_blockmask)) { + return EINVAL; + } + iolock = XFS_IOLOCK_SHARED; + locktype = VRWLOCK_WRITE_DIRECT; + } else { + if (io->io_flags & XFS_IOCORE_RT) + return EINVAL; + iolock = XFS_IOLOCK_EXCL; + locktype = VRWLOCK_WRITE; + } + + if (ioflags & IO_ISLOCKED) + iolock = 0; + + xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); + + isize = xip->i_d.di_size; + limit = XFS_MAXIOFFSET(mp); + + if (file->f_flags & O_APPEND) + *offset = isize; + +start: + n = limit - *offset; + if (n <= 0) { + xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); + return EFBIG; + } + if (n < size) + size = n; + + new_size = *offset + size; + if (new_size > isize) { + io->io_new_size = new_size; + } + + if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && + !(ioflags & IO_INVIS) && !eventsent)) { + loff_t savedsize = *offset; + int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); + + xfs_iunlock(xip, XFS_ILOCK_EXCL); + error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, + *offset, size, + dmflags, &locktype); + if (error) { + if (iolock) xfs_iunlock(xip, iolock); + return -error; + } + xfs_ilock(xip, XFS_ILOCK_EXCL); + eventsent = 1; + + /* + * The iolock was dropped and reaquired in XFS_SEND_DATA + * so we have to recheck the size when appending. + * We will only "goto start;" once, since having sent the + * event prevents another call to XFS_SEND_DATA, which is + * what allows the size to change in the first place. + */ + if ((file->f_flags & O_APPEND) && + savedsize != xip->i_d.di_size) { + *offset = isize = xip->i_d.di_size; + goto start; + } + } + + /* + * If the offset is beyond the size of the file, we have a couple + * of things to do. First, if there is already space allocated + * we need to either create holes or zero the disk or ... + * + * If there is a page where the previous size lands, we need + * to zero it out up to the new size. + */ + + if (!(ioflags & IO_ISDIRECT) && (*offset > isize && isize)) { + error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, *offset, + isize, *offset + size); + if (error) { + xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); + return(-error); + } + } + xfs_iunlock(xip, XFS_ILOCK_EXCL); + + /* + * If we're writing the file then make sure to clear the + * setuid and setgid bits if the process is not being run + * by root. This keeps people from modifying setuid and + * setgid binaries. + */ + + if (((xip->i_d.di_mode & S_ISUID) || + ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == + (S_ISGID | S_IXGRP))) && + !capable(CAP_FSETID)) { + error = xfs_write_clear_setuid(xip); + if (error) { + xfs_iunlock(xip, iolock); + return -error; + } + } + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,22) + if ((ssize_t) size < 0) { + ret = EINVAL; + goto error; + } + + if (!access_ok(VERIFY_READ, buf, size)) { + ret = EINVAL; + goto error; + } +#else +#define do_generic_direct_write(file, buf, size, offset) \ + generic_file_write_nolock(file, buf, size, offset) +#define do_generic_file_write(file, buf, size, offset) \ + generic_file_write_nolock(file, buf, size, offset) +#endif + +retry: + if (unlikely(ioflags & IO_ISDIRECT)) { + loff_t pos = *offset; + struct address_space *mapping = file->f_dentry->d_inode->i_mapping; + struct inode *inode = mapping->host; + + ret = precheck_file_write(file, inode, &size, &pos); + if (ret || size == 0) + goto error; + + xfs_inval_cached_pages(vp, io, pos, 1, 1); + inode->i_ctime = inode->i_mtime = CURRENT_TIME; + /* mark_inode_dirty_sync(inode); - we do this later */ + + xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, buf, size, pos, ioflags); + ret = generic_file_direct_IO(WRITE, file, (char *)buf, size, pos); + if (ret > 0) + *offset += ret; + } else { + xfs_rw_enter_trace(XFS_WRITE_ENTER, io, buf, size, *offset, ioflags); + ret = do_generic_file_write(file, buf, size, offset); + } + + if (unlikely(ioflags & IO_INVIS)) { + /* generic_file_write updates the mtime/ctime but we need + * to undo that because this I/O was supposed to be + * invisible. + */ + struct inode *inode = LINVFS_GET_IP(vp); + inode->i_mtime = xip->i_d.di_mtime.t_sec; + inode->i_ctime = xip->i_d.di_ctime.t_sec; + } else { + xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + } + + if ((ret == -ENOSPC) && + DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) && + !(ioflags & IO_INVIS)) { + + xfs_rwunlock(bdp, locktype); + error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, + DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, + 0, 0, 0); /* Delay flag intentionally unused */ + if (error) + return -error; + xfs_rwlock(bdp, locktype); + *offset = xip->i_d.di_size; + goto retry; + } + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,22) +error: +#endif + if (ret <= 0) { + if (iolock) + xfs_rwunlock(bdp, locktype); + return ret; + } + + XFS_STATS_ADD(xs_write_bytes, ret); + + if (*offset > xip->i_d.di_size) { + xfs_ilock(xip, XFS_ILOCK_EXCL); + if (*offset > xip->i_d.di_size) { + struct inode *inode = LINVFS_GET_IP(vp); + + xip->i_d.di_size = *offset; + i_size_write(inode, *offset); + xip->i_update_core = 1; + xip->i_update_size = 1; + mark_inode_dirty_sync(inode); + } + xfs_iunlock(xip, XFS_ILOCK_EXCL); + } + + /* Handle various SYNC-type writes */ + if ((file->f_flags & O_SYNC) || IS_SYNC(file->f_dentry->d_inode)) { + + /* + * If we're treating this as O_DSYNC and we have not updated the + * size, force the log. + */ + + if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) + && !(xip->i_update_size)) { + /* + * If an allocation transaction occurred + * without extending the size, then we have to force + * the log up the proper point to ensure that the + * allocation is permanent. We can't count on + * the fact that buffered writes lock out direct I/O + * writes - the direct I/O write could have extended + * the size nontransactionally, then finished before + * we started. xfs_write_file will think that the file + * didn't grow but the update isn't safe unless the + * size change is logged. + * + * Force the log if we've committed a transaction + * against the inode or if someone else has and + * the commit record hasn't gone to disk (e.g. + * the inode is pinned). This guarantees that + * all changes affecting the inode are permanent + * when we return. + */ + + xfs_inode_log_item_t *iip; + xfs_lsn_t lsn; + + iip = xip->i_itemp; + if (iip && iip->ili_last_lsn) { + lsn = iip->ili_last_lsn; + xfs_log_force(mp, lsn, + XFS_LOG_FORCE | XFS_LOG_SYNC); + } else if (xfs_ipincount(xip) > 0) { + xfs_log_force(mp, (xfs_lsn_t)0, + XFS_LOG_FORCE | XFS_LOG_SYNC); + } + + } else { + xfs_trans_t *tp; + + /* + * O_SYNC or O_DSYNC _with_ a size update are handled + * the same way. + * + * If the write was synchronous then we need to make + * sure that the inode modification time is permanent. + * We'll have updated the timestamp above, so here + * we use a synchronous transaction to log the inode. + * It's not fast, but it's necessary. + * + * If this a dsync write and the size got changed + * non-transactionally, then we need to ensure that + * the size change gets logged in a synchronous + * transaction. + */ + + tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); + if ((error = xfs_trans_reserve(tp, 0, + XFS_SWRITE_LOG_RES(mp), + 0, 0, 0))) { + /* Transaction reserve failed */ + xfs_trans_cancel(tp, 0); + } else { + /* Transaction reserve successful */ + xfs_ilock(xip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL); + xfs_trans_ihold(tp, xip); + xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE); + xfs_trans_set_sync(tp); + error = xfs_trans_commit(tp, 0, (xfs_lsn_t)0); + xfs_iunlock(xip, XFS_ILOCK_EXCL); + } + } + } /* (ioflags & O_SYNC) */ + + /* + * If we are coming from an nfsd thread then insert into the + * reference cache. + */ + + if (!strcmp(current->comm, "nfsd")) + xfs_refcache_insert(xip); + + /* Drop lock this way - the old refcache release is in here */ + if (iolock) + xfs_rwunlock(bdp, locktype); + + return(ret); +#endif /* XXXKAN */ + return (0); +} + +/* + * Initiate IO on given buffer. + */ +int +xfs_buf_iorequest(struct xfs_buf *bp) +{ + bp->b_flags &= ~(B_INVAL|B_DONE); + bp->b_ioflags &= ~BIO_ERROR; + + if (bp->b_flags & B_ASYNC) + BUF_KERNPROC(bp); + + if (bp->b_vp == NULL) { + if (bp->b_iocmd == BIO_WRITE) { + bp->b_flags &= ~(B_DELWRI | B_DEFERRED); + bufobj_wref(bp->b_bufobj); + } + + bp->b_iooffset = (bp->b_blkno << BBSHIFT); + bstrategy(bp); + } else { + if (bp->b_iocmd == BIO_WRITE) { + /* Mark the buffer clean */ + bundirty(bp); + bufobj_wref(bp->b_bufobj); + vfs_busy_pages(bp, 1); + } else if (bp->b_iocmd == BIO_READ) { + vfs_busy_pages(bp, 0); + } + bp->b_iooffset = dbtob(bp->b_blkno); + bstrategy(bp); + } + return 0; +} + +/* + * All xfs metadata buffers except log state machine buffers + * get this attached as their b_bdstrat callback function. + * This is so that we can catch a buffer + * after prematurely unpinning it to forcibly shutdown the filesystem. + */ +int +xfs_bdstrat_cb(struct xfs_buf *bp) +{ + xfs_mount_t *mp; + + mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *); + if (!XFS_FORCED_SHUTDOWN(mp)) { + xfs_buf_iorequest(bp); + return 0; + } else { + xfs_buftrace("XFS__BDSTRAT IOERROR", bp); + /* + * Metadata write that didn't get logged but + * written delayed anyway. These aren't associated + * with a transaction, and can be ignored. + */ + if (XFS_BUF_IODONE_FUNC(bp) == NULL && + (XFS_BUF_ISREAD(bp)) == 0) + return (xfs_bioerror_relse(bp)); + else + return (xfs_bioerror(bp)); + } +} + + +int +xfs_bmap(bhv_desc_t *bdp, + xfs_off_t offset, + ssize_t count, + int flags, + xfs_iomap_t *iomapp, + int *niomaps) +{ + xfs_inode_t *ip = XFS_BHVTOI(bdp); + xfs_iocore_t *io = &ip->i_iocore; + + ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); + ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) == + ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0)); + + return xfs_iomap(io, offset, count, flags, iomapp, niomaps); +} + +/* + * Wrapper around bdstrat so that we can stop data + * from going to disk in case we are shutting down the filesystem. + * Typically user data goes thru this path; one of the exceptions + * is the superblock. + */ +int +xfsbdstrat( + struct xfs_mount *mp, + struct xfs_buf *bp) +{ + ASSERT(mp); + if (!XFS_FORCED_SHUTDOWN(mp)) { + /* Grio redirection would go here + * if (XFS_BUF_IS_GRIO(bp)) { + */ + + return xfs_buf_iorequest(bp); + return 0; + } + + xfs_buftrace("XFSBDSTRAT IOERROR", bp); + return (xfs_bioerror_relse(bp)); +} + +/* + * If the underlying (data/log/rt) device is readonly, there are some + * operations that cannot proceed. + */ +int +xfs_dev_is_read_only( + xfs_mount_t *mp, + char *message) +{ + if (xfs_readonly_buftarg(mp->m_ddev_targp) || + xfs_readonly_buftarg(mp->m_logdev_targp) || + (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) { + cmn_err(CE_NOTE, + "XFS: %s required on read-only device.", message); + cmn_err(CE_NOTE, + "XFS: write access unavailable, cannot proceed."); + return EROFS; + } + return 0; +} diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_frw.h b/sys/gnu/fs/xfs/FreeBSD/xfs_frw.h new file mode 100644 index 000000000000..c08abceef176 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_frw.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_FRW_H__ +#define __XFS_FRW_H__ + +struct xfs_vnode; +struct bhv_desc; +struct xfs_mount; +struct xfs_iocore; +struct xfs_inode; +struct xfs_bmbt_irec; +struct xfs_buf; +struct xfs_iomap; + +#if defined(XFS_RW_TRACE) +/* + * Defines for the trace mechanisms in xfs_lrw.c. + */ +#define XFS_RW_KTRACE_SIZE 128 + +#define XFS_READ_ENTER 1 +#define XFS_WRITE_ENTER 2 +#define XFS_IOMAP_READ_ENTER 3 +#define XFS_IOMAP_WRITE_ENTER 4 +#define XFS_IOMAP_READ_MAP 5 +#define XFS_IOMAP_WRITE_MAP 6 +#define XFS_IOMAP_WRITE_NOSPACE 7 +#define XFS_ITRUNC_START 8 +#define XFS_ITRUNC_FINISH1 9 +#define XFS_ITRUNC_FINISH2 10 +#define XFS_CTRUNC1 11 +#define XFS_CTRUNC2 12 +#define XFS_CTRUNC3 13 +#define XFS_CTRUNC4 14 +#define XFS_CTRUNC5 15 +#define XFS_CTRUNC6 16 +#define XFS_BUNMAPI 17 +#define XFS_INVAL_CACHED 18 +#define XFS_DIORD_ENTER 19 +#define XFS_DIOWR_ENTER 20 +#define XFS_SENDFILE_ENTER 21 +#define XFS_WRITEPAGE_ENTER 22 +#define XFS_RELEASEPAGE_ENTER 23 +#define XFS_IOMAP_ALLOC_ENTER 24 +#define XFS_IOMAP_ALLOC_MAP 25 +#define XFS_IOMAP_UNWRITTEN 26 +extern void xfs_rw_enter_trace(int, struct xfs_iocore *, + const char *, size_t, loff_t, int); +extern void xfs_inval_cached_trace(struct xfs_iocore *, + xfs_off_t, xfs_off_t, xfs_off_t, xfs_off_t); +#else +#define xfs_rw_enter_trace(tag, io, buf, size, offset, ioflags) +#define xfs_inval_cached_trace(io, offset, len, first, last) +#endif + +/* + * Maximum count of bmaps used by read and write paths. + */ +#define XFS_MAX_RW_NBMAPS 4 + +extern int xfs_bmap(struct bhv_desc *, xfs_off_t, ssize_t, int, + struct xfs_iomap *, int *); +extern int xfsbdstrat(struct xfs_mount *, struct xfs_buf *); +extern int xfs_bdstrat_cb(struct xfs_buf *); + +extern int xfs_zero_eof(struct xfs_vnode *, struct xfs_iocore *, xfs_off_t, + xfs_fsize_t, xfs_fsize_t); +extern void xfs_inval_cached_pages(struct xfs_vnode*, struct xfs_iocore *, + xfs_off_t, int, int); +extern ssize_t xfs_read(bhv_desc_t *, uio_t *, int, cred_t *); +extern ssize_t xfs_write(bhv_desc_t *, uio_t *, int, cred_t *); + +extern int xfs_dev_is_read_only(struct xfs_mount *, char *); + +#define XFS_FSB_TO_DB_IO(io,fsb) \ + (((io)->io_flags & XFS_IOCORE_RT) ? \ + XFS_FSB_TO_BB((io)->io_mount, (fsb)) : \ + XFS_FSB_TO_DADDR((io)->io_mount, (fsb))) + +#endif /* __XFS_FRW_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_fs_subr.c b/sys/gnu/fs/xfs/FreeBSD/xfs_fs_subr.c new file mode 100644 index 000000000000..5a581ab5ee6e --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_fs_subr.c @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" + +/* + * Stub for no-op vnode operations that return error status. + */ +int +fs_noerr() +{ + return 0; +} + +/* + * Operation unsupported under this file system. + */ +int +fs_nosys() +{ + return ENOSYS; +} + +/* + * Stub for inactive, strategy, and read/write lock/unlock. Does nothing. + */ +/* ARGSUSED */ +void +fs_noval() +{ +} + +/* + * vnode pcache layer for vnode_tosspages. + * 'last' parameter unused but left in for IRIX compatibility + */ +void +fs_tosspages( + bhv_desc_t *bdp, + xfs_off_t first, + xfs_off_t last, + int fiopt) +{ +#if XXXKAN + vnode_t *vp = BHV_TO_VNODE(bdp); + struct inode *ip = LINVFS_GET_IP(vp); + + if (VN_CACHED(vp)) + truncate_inode_pages(ip->i_mapping, first); +#endif +} + + +/* + * vnode pcache layer for vnode_flushinval_pages. + * 'last' parameter unused but left in for IRIX compatibility + */ +void +fs_flushinval_pages( + bhv_desc_t *bdp, + xfs_off_t first, + xfs_off_t last, + int fiopt) +{ +#if XXXKAN + vnode_t *vp = BHV_TO_VNODE(bdp); + struct inode *ip = LINVFS_GET_IP(vp); + + if (VN_CACHED(vp)) { + filemap_fdatasync(ip->i_mapping); + fsync_inode_data_buffers(ip); + filemap_fdatawait(ip->i_mapping); + + truncate_inode_pages(ip->i_mapping, first); + } +#endif +} + +/* + * vnode pcache layer for vnode_flush_pages. + * 'last' parameter unused but left in for IRIX compatibility + */ +int +fs_flush_pages( + bhv_desc_t *bdp, + xfs_off_t first, + xfs_off_t last, + uint64_t flags, + int fiopt) +{ +#if XXXKAN + vnode_t *vp = BHV_TO_VNODE(bdp); + struct inode *ip = LINVFS_GET_IP(vp); + + if (VN_CACHED(vp)) { + filemap_fdatasync(ip->i_mapping); + fsync_inode_data_buffers(ip); + filemap_fdatawait(ip->i_mapping); + } +#endif + return 0; +} diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_fs_subr.h b/sys/gnu/fs/xfs/FreeBSD/xfs_fs_subr.h new file mode 100644 index 000000000000..198b8dd7818d --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_fs_subr.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2000, 2002 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_SUBR_H__ +#define __XFS_SUBR_H__ + +/* + * Utilities shared among file system implementations. + */ + +struct cred; + +extern int fs_noerr(void); +extern int fs_nosys(void); +extern int fs_nodev(void); +extern void fs_noval(void); +extern void fs_tosspages(bhv_desc_t *, xfs_off_t, xfs_off_t, int); +extern void fs_flushinval_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, int); +extern int fs_flush_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, uint64_t, int); + +#endif /* __XFS_FS_SUBR_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_globals.c b/sys/gnu/fs/xfs/FreeBSD/xfs_globals.c new file mode 100644 index 000000000000..85f34fc222ce --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_globals.c @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * This file contains globals needed by XFS that were normally defined + * somewhere else in IRIX. + */ + +#include "xfs.h" +#include "xfs_macros.h" +#include "xfs_types.h" +#include "xfs_bmap_btree.h" +#include "xfs_bit.h" +#include "xfs_refcache.h" + +/* + * Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n, + * other XFS code uses these values. + */ + +xfs_param_t xfs_params = { + /* MIN DFLT MAX */ +#ifdef HAVE_REFCACHE + .refcache_size = { 0, 128, XFS_REFCACHE_SIZE_MAX }, + .refcache_purge = { 0, 32, XFS_REFCACHE_SIZE_MAX }, +#endif + .restrict_chown = { 0, 1, 1 }, + .sgid_inherit = { 0, 0, 1 }, + .symlink_mode = { 0, 0, 1 }, + .panic_mask = { 0, 0, 127 }, + .error_level = { 0, 3, 11 }, + .sync_interval = { 1, 30, 60 }, + .stats_clear = { 0, 0, 1 }, + .inherit_sync = { 0, 1, 1 }, + .inherit_nodump = { 0, 1, 1 }, + .inherit_noatim = { 0, 1, 1 }, +}; + +/* + * Global system credential structure. + */ +cred_t sys_cred_val, *sys_cred = &sys_cred_val; diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_iget.c b/sys/gnu/fs/xfs/FreeBSD/xfs_iget.c new file mode 100644 index 000000000000..45485bb361a5 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_iget.c @@ -0,0 +1,973 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" + +#include "xfs_macros.h" +#include "xfs_types.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir.h" +#include "xfs_dir2.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_alloc_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_attr_sf.h" +#include "xfs_dir_sf.h" +#include "xfs_dir2_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_quota.h" +#include "xfs_utils.h" +#include "xfs_vnode.h" + + +static int xfs_vn_allocate(xfs_mount_t *, xfs_inode_t *, struct xfs_vnode **); + +/* + * Initialize the inode hash table for the newly mounted file system. + * + * mp -- this is the mount point structure for the file system being + * initialized + */ +void +xfs_ihash_init(xfs_mount_t *mp) +{ + int i; + + mp->m_ihsize = XFS_BUCKETS(mp); + mp->m_ihash = (xfs_ihash_t *)kmem_zalloc(mp->m_ihsize + * sizeof(xfs_ihash_t), KM_SLEEP); + ASSERT(mp->m_ihash != NULL); + for (i = 0; i < mp->m_ihsize; i++) { + rwlock_init(&(mp->m_ihash[i].ih_lock)); + } +} + +/* + * Free up structures allocated by xfs_ihash_init, at unmount time. + */ +void +xfs_ihash_free(xfs_mount_t *mp) +{ + int i; + + for (i = 0; i < mp->m_ihsize; i++) + rwlock_destroy(&mp->m_ihash[i].ih_lock); + kmem_free(mp->m_ihash, mp->m_ihsize*sizeof(xfs_ihash_t)); + mp->m_ihash = NULL; +} + +/* + * Initialize the inode cluster hash table for the newly mounted file system. + * + * mp -- this is the mount point structure for the file system being + * initialized + */ +void +xfs_chash_init(xfs_mount_t *mp) +{ + int i; + + /* + * m_chash size is based on m_ihash + * with a minimum of 37 entries + */ + mp->m_chsize = (XFS_BUCKETS(mp)) / + (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog); + if (mp->m_chsize < 37) { + mp->m_chsize = 37; + } + mp->m_chash = (xfs_chash_t *)kmem_zalloc(mp->m_chsize + * sizeof(xfs_chash_t), + KM_SLEEP); + ASSERT(mp->m_chash != NULL); + + for (i = 0; i < mp->m_chsize; i++) { + spinlock_init(&mp->m_chash[i].ch_lock,"xfshash"); + } +} + +/* + * Free up structures allocated by xfs_chash_init, at unmount time. + */ +void +xfs_chash_free(xfs_mount_t *mp) +{ + int i; + + for (i = 0; i < mp->m_chsize; i++) { + spinlock_destroy(&mp->m_chash[i].ch_lock); + } + + kmem_free(mp->m_chash, mp->m_chsize*sizeof(xfs_chash_t)); + mp->m_chash = NULL; +} + +/* + * Look up an inode by number in the given file system. + * The inode is looked up in the hash table for the file system + * represented by the mount point parameter mp. Each bucket of + * the hash table is guarded by an individual semaphore. + * + * If the inode is found in the hash table, its corresponding vnode + * is obtained with a call to vn_get(). This call takes care of + * coordination with the reclamation of the inode and vnode. Note + * that the vmap structure is filled in while holding the hash lock. + * This gives us the state of the inode/vnode when we found it and + * is used for coordination in vn_get(). + * + * If it is not in core, read it in from the file system's device and + * add the inode into the hash table. + * + * The inode is locked according to the value of the lock_flags parameter. + * This flag parameter indicates how and if the inode's IO lock and inode lock + * should be taken. + * + * mp -- the mount point structure for the current file system. It points + * to the inode hash table. + * tp -- a pointer to the current transaction if there is one. This is + * simply passed through to the xfs_iread() call. + * ino -- the number of the inode desired. This is the unique identifier + * within the file system for the inode being requested. + * lock_flags -- flags indicating how to lock the inode. See the comment + * for xfs_ilock() for a list of valid values. + * bno -- the block number starting the buffer containing the inode, + * if known (as by bulkstat), else 0. + */ +int +xfs_iget( + xfs_mount_t *mp, + xfs_trans_t *tp, + xfs_ino_t ino, + uint lock_flags, + xfs_inode_t **ipp, + xfs_daddr_t bno) +{ + xfs_ihash_t *ih; + xfs_inode_t *ip; + xfs_inode_t *iq; + xfs_vnode_t *vp; + ulong version; + int error; + /* REFERENCED */ + int newnode; + xfs_chash_t *ch; + xfs_chashlist_t *chl, *chlnew; + vmap_t vmap; + SPLDECL(s); + + XFS_STATS_INC(xs_ig_attempts); + + ih = XFS_IHASH(mp, ino); + +again: + read_lock(&ih->ih_lock); + + for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) { + if (ip->i_ino == ino) { + vp = XFS_ITOV(ip); + VMAP(vp, vmap); + /* + * Inode cache hit: if ip is not at the front of + * its hash chain, move it there now. + * Do this with the lock held for update, but + * do statistics after releasing the lock. + */ + if (ip->i_prevp != &ih->ih_next + && rwlock_trypromote(&ih->ih_lock)) { + + if ((iq = ip->i_next)) { + iq->i_prevp = ip->i_prevp; + } + *ip->i_prevp = iq; + iq = ih->ih_next; + iq->i_prevp = &ip->i_next; + ip->i_next = iq; + ip->i_prevp = &ih->ih_next; + ih->ih_next = ip; + write_unlock(&ih->ih_lock); + } else { + read_unlock(&ih->ih_lock); + } + + XFS_STATS_INC(xs_ig_found); + + /* + * Get a reference to the vnode/inode. + * vn_get() takes care of coordination with + * the file system inode release and reclaim + * functions. If it returns NULL, the inode + * has been reclaimed so just start the search + * over again. We probably won't find it, + * but we could be racing with another cpu + * looking for the same inode so we have to at + * least look. + */ + if (!(vp = vn_get(vp, &vmap))) { + XFS_STATS_INC(xs_ig_frecycle); + goto again; + } + + if (lock_flags != 0) { + ip->i_flags &= ~XFS_IRECLAIM; + xfs_ilock(ip, lock_flags); + } + + newnode = (ip->i_d.di_mode == 0); + if (newnode) { + xfs_iocore_inode_reinit(ip); + } + ip->i_flags &= ~XFS_ISTALE; + + vn_trace_exit(vp, "xfs_iget.found", + (inst_t *)__return_address); + goto return_ip; + } + } + + /* + * Inode cache miss: save the hash chain version stamp and unlock + * the chain, so we don't deadlock in vn_alloc. + */ + XFS_STATS_INC(xs_ig_missed); + + version = ih->ih_version; + + read_unlock(&ih->ih_lock); + + /* + * Read the disk inode attributes into a new inode structure and get + * a new vnode for it. This should also initialize i_ino and i_mount. + */ + error = xfs_iread(mp, tp, ino, &ip, bno); + if (error) { + return error; + } + + error = xfs_vn_allocate(mp, ip, &vp); + if (error) { + return error; + } + vn_trace_exit(vp, "xfs_iget.alloc", (inst_t *)__return_address); + + xfs_inode_lock_init(ip, vp); + xfs_iocore_inode_init(ip); + + if (lock_flags != 0) { + xfs_ilock(ip, lock_flags); + } + + /* + * Put ip on its hash chain, unless someone else hashed a duplicate + * after we released the hash lock. + */ + write_lock(&ih->ih_lock); + + if (ih->ih_version != version) { + for (iq = ih->ih_next; iq != NULL; iq = iq->i_next) { + if (iq->i_ino == ino) { + write_unlock(&ih->ih_lock); + xfs_idestroy(ip); + + XFS_STATS_INC(xs_ig_dup); + goto again; + } + } + } + + /* + * These values _must_ be set before releasing ihlock! + */ + ip->i_hash = ih; + if ((iq = ih->ih_next)) { + iq->i_prevp = &ip->i_next; + } + ip->i_next = iq; + ip->i_prevp = &ih->ih_next; + ih->ih_next = ip; + ip->i_udquot = ip->i_gdquot = NULL; + ih->ih_version++; + + write_unlock(&ih->ih_lock); + + /* + * put ip on its cluster's hash chain + */ + ASSERT(ip->i_chash == NULL && ip->i_cprev == NULL && + ip->i_cnext == NULL); + + chlnew = NULL; + ch = XFS_CHASH(mp, ip->i_blkno); + chlredo: + s = mutex_spinlock(&ch->ch_lock); + for (chl = ch->ch_list; chl != NULL; chl = chl->chl_next) { + if (chl->chl_blkno == ip->i_blkno) { + + /* insert this inode into the doubly-linked list + * where chl points */ + if ((iq = chl->chl_ip)) { + ip->i_cprev = iq->i_cprev; + iq->i_cprev->i_cnext = ip; + iq->i_cprev = ip; + ip->i_cnext = iq; + } else { + ip->i_cnext = ip; + ip->i_cprev = ip; + } + chl->chl_ip = ip; + ip->i_chash = chl; + break; + } + } + + /* no hash list found for this block; add a new hash list */ + if (chl == NULL) { + if (chlnew == NULL) { + mutex_spinunlock(&ch->ch_lock, s); + ASSERT(xfs_chashlist_zone != NULL); + chlnew = (xfs_chashlist_t *) + kmem_zone_alloc(xfs_chashlist_zone, + KM_SLEEP); + ASSERT(chlnew != NULL); + goto chlredo; + } else { + ip->i_cnext = ip; + ip->i_cprev = ip; + ip->i_chash = chlnew; + chlnew->chl_ip = ip; + chlnew->chl_blkno = ip->i_blkno; + chlnew->chl_next = ch->ch_list; + ch->ch_list = chlnew; + chlnew = NULL; + } + } else { + if (chlnew != NULL) { + kmem_zone_free(xfs_chashlist_zone, chlnew); + } + } + + mutex_spinunlock(&ch->ch_lock, s); + + + /* + * Link ip to its mount and thread it on the mount's inode list. + */ + XFS_MOUNT_ILOCK(mp); + if ((iq = mp->m_inodes)) { + ASSERT(iq->i_mprev->i_mnext == iq); + ip->i_mprev = iq->i_mprev; + iq->i_mprev->i_mnext = ip; + iq->i_mprev = ip; + ip->i_mnext = iq; + } else { + ip->i_mnext = ip; + ip->i_mprev = ip; + } + mp->m_inodes = ip; + + XFS_MOUNT_IUNLOCK(mp); + + newnode = 1; + + return_ip: + ASSERT(ip->i_df.if_ext_max == + XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t)); + + ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) == + ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0)); + + *ipp = ip; + + /* + * If we have a real type for an on-disk inode, we can set ops(&unlock) + * now. If it's a new inode being created, xfs_ialloc will handle it. + */ + XVFS_INIT_VNODE(XFS_MTOVFS(mp), vp, XFS_ITOBHV(ip), 1); + + return 0; +} + +/* + * Do the setup for the various locks within the incore inode. + */ +void +xfs_inode_lock_init( + xfs_inode_t *ip, + xfs_vnode_t *vp) +{ + mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, + "xfsino", (long)vp->v_number); + mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", vp->v_number); +#if XXXKAN + init_waitqueue_head(&ip->i_ipin_wait); +#endif + atomic_set(&ip->i_pincount, 0); + init_sema(&ip->i_flock, 1, "xfsfino", vp->v_number); +} + +/* + * Look for the inode corresponding to the given ino in the hash table. + * If it is there and its i_transp pointer matches tp, return it. + * Otherwise, return NULL. + */ +xfs_inode_t * +xfs_inode_incore(xfs_mount_t *mp, + xfs_ino_t ino, + xfs_trans_t *tp) +{ + xfs_ihash_t *ih; + xfs_inode_t *ip; + + ih = XFS_IHASH(mp, ino); + read_lock(&ih->ih_lock); + for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) { + if (ip->i_ino == ino) { + /* + * If we find it and tp matches, return it. + * Otherwise break from the loop and return + * NULL. + */ + if (ip->i_transp == tp) { + read_unlock(&ih->ih_lock); + return (ip); + } + break; + } + } + read_unlock(&ih->ih_lock); + return (NULL); +} + +/* + * Decrement reference count of an inode structure and unlock it. + * + * ip -- the inode being released + * lock_flags -- this parameter indicates the inode's locks to be + * to be released. See the comment on xfs_iunlock() for a list + * of valid values. + */ +void +xfs_iput(xfs_inode_t *ip, + uint lock_flags) +{ + xfs_vnode_t *vp = XFS_ITOV(ip); + + vn_trace_entry(vp, "xfs_iput", (inst_t *)__return_address); + + xfs_iunlock(ip, lock_flags); + + VN_RELE(vp); +} + +/* + * Special iput for brand-new inodes that are still locked + */ +void +xfs_iput_new(xfs_inode_t *ip, + uint lock_flags) +{ + xfs_vnode_t *vp = XFS_ITOV(ip); + + vn_trace_entry(vp, "xfs_iput_new", (inst_t *)__return_address); + + if (lock_flags) + xfs_iunlock(ip, lock_flags); + + VN_RELE(vp); +} + + +/* + * This routine embodies the part of the reclaim code that pulls + * the inode from the inode hash table and the mount structure's + * inode list. + * This should only be called from xfs_reclaim(). + */ +void +xfs_ireclaim(xfs_inode_t *ip) +{ + xfs_vnode_t *vp; + + /* + * Remove from old hash list and mount list. + */ + XFS_STATS_INC(xs_ig_reclaims); + + xfs_iextract(ip); + + /* + * Here we do a spurious inode lock in order to coordinate with + * xfs_sync(). This is because xfs_sync() references the inodes + * in the mount list without taking references on the corresponding + * vnodes. We make that OK here by ensuring that we wait until + * the inode is unlocked in xfs_sync() before we go ahead and + * free it. We get both the regular lock and the io lock because + * the xfs_sync() code may need to drop the regular one but will + * still hold the io lock. + */ + xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + + /* + * Release dquots (and their references) if any. An inode may escape + * xfs_inactive and get here via vn_alloc->vn_reclaim path. + */ + XFS_QM_DQDETACH(ip->i_mount, ip); + + /* + * Pull our behavior descriptor from the vnode chain. + */ + vp = XFS_ITOV_NULL(ip); + if (vp) { + vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip)); + } + + /* + * Free all memory associated with the inode. + */ + xfs_idestroy(ip); +} + +/* + * This routine removes an about-to-be-destroyed inode from + * all of the lists in which it is located with the exception + * of the behavior chain. + */ +void +xfs_iextract( + xfs_inode_t *ip) +{ + xfs_ihash_t *ih; + xfs_inode_t *iq; + xfs_mount_t *mp; + xfs_chash_t *ch; + xfs_chashlist_t *chl, *chm; + SPLDECL(s); + + ih = ip->i_hash; + write_lock(&ih->ih_lock); + if ((iq = ip->i_next)) { + iq->i_prevp = ip->i_prevp; + } + *ip->i_prevp = iq; + write_unlock(&ih->ih_lock); + + /* + * Remove from cluster hash list + * 1) delete the chashlist if this is the last inode on the chashlist + * 2) unchain from list of inodes + * 3) point chashlist->chl_ip to 'chl_next' if to this inode. + */ + mp = ip->i_mount; + ch = XFS_CHASH(mp, ip->i_blkno); + s = mutex_spinlock(&ch->ch_lock); + + if (ip->i_cnext == ip) { + /* Last inode on chashlist */ + ASSERT(ip->i_cnext == ip && ip->i_cprev == ip); + ASSERT(ip->i_chash != NULL); + chm=NULL; + for (chl = ch->ch_list; chl != NULL; chl = chl->chl_next) { + if (chl->chl_blkno == ip->i_blkno) { + if (chm == NULL) { + /* first item on the list */ + ch->ch_list = chl->chl_next; + } else { + chm->chl_next = chl->chl_next; + } + kmem_zone_free(xfs_chashlist_zone, chl); + break; + } else { + ASSERT(chl->chl_ip != ip); + chm = chl; + } + } + ASSERT_ALWAYS(chl != NULL); + } else { + /* delete one inode from a non-empty list */ + iq = ip->i_cnext; + iq->i_cprev = ip->i_cprev; + ip->i_cprev->i_cnext = iq; + if (ip->i_chash->chl_ip == ip) { + ip->i_chash->chl_ip = iq; + } + ip->i_chash = __return_address; + ip->i_cprev = __return_address; + ip->i_cnext = __return_address; + } + mutex_spinunlock(&ch->ch_lock, s); + + /* + * Remove from mount's inode list. + */ + XFS_MOUNT_ILOCK(mp); + ASSERT((ip->i_mnext != NULL) && (ip->i_mprev != NULL)); + iq = ip->i_mnext; + iq->i_mprev = ip->i_mprev; + ip->i_mprev->i_mnext = iq; + + /* + * Fix up the head pointer if it points to the inode being deleted. + */ + if (mp->m_inodes == ip) { + if (ip == iq) { + mp->m_inodes = NULL; + } else { + mp->m_inodes = iq; + } + } + +#if XXXKAN + /* + * Not sure if while i_reclaim crap is needed on + * FreeBSD, will revisit this later. + */ + + /* Deal with the deleted inodes list */ + list_del_init(&ip->i_reclaim); +#endif + + mp->m_ireclaims++; + XFS_MOUNT_IUNLOCK(mp); +} + +/* + * This is a wrapper routine around the xfs_ilock() routine + * used to centralize some grungy code. It is used in places + * that wish to lock the inode solely for reading the extents. + * The reason these places can't just call xfs_ilock(SHARED) + * is that the inode lock also guards to bringing in of the + * extents from disk for a file in b-tree format. If the inode + * is in b-tree format, then we need to lock the inode exclusively + * until the extents are read in. Locking it exclusively all + * the time would limit our parallelism unnecessarily, though. + * What we do instead is check to see if the extents have been + * read in yet, and only lock the inode exclusively if they + * have not. + * + * The function returns a value which should be given to the + * corresponding xfs_iunlock_map_shared(). This value is + * the mode in which the lock was actually taken. + */ +uint +xfs_ilock_map_shared( + xfs_inode_t *ip) +{ + uint lock_mode; + + if ((ip->i_d.di_format == XFS_DINODE_FMT_BTREE) && + ((ip->i_df.if_flags & XFS_IFEXTENTS) == 0)) { + lock_mode = XFS_ILOCK_EXCL; + } else { + lock_mode = XFS_ILOCK_SHARED; + } + + xfs_ilock(ip, lock_mode); + + return lock_mode; +} + +/* + * This is simply the unlock routine to go with xfs_ilock_map_shared(). + * All it does is call xfs_iunlock() with the given lock_mode. + */ +void +xfs_iunlock_map_shared( + xfs_inode_t *ip, + unsigned int lock_mode) +{ + xfs_iunlock(ip, lock_mode); +} + +/* + * The xfs inode contains 2 locks: a multi-reader lock called the + * i_iolock and a multi-reader lock called the i_lock. This routine + * allows either or both of the locks to be obtained. + * + * The 2 locks should always be ordered so that the IO lock is + * obtained first in order to prevent deadlock. + * + * ip -- the inode being locked + * lock_flags -- this parameter indicates the inode's locks + * to be locked. It can be: + * XFS_IOLOCK_SHARED, + * XFS_IOLOCK_EXCL, + * XFS_ILOCK_SHARED, + * XFS_ILOCK_EXCL, + * XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED, + * XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL, + * XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED, + * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL + */ +void +xfs_ilock(xfs_inode_t *ip, + uint lock_flags) +{ + /* + * You can't set both SHARED and EXCL for the same lock, + * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, + * and XFS_ILOCK_EXCL are valid values to set in lock_flags. + */ + ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != + (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); + ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != + (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); + ASSERT((lock_flags & ~XFS_LOCK_MASK) == 0); + + if (lock_flags & XFS_IOLOCK_EXCL) { + mrupdate(&ip->i_iolock); + } else if (lock_flags & XFS_IOLOCK_SHARED) { + mraccess(&ip->i_iolock); + } + if (lock_flags & XFS_ILOCK_EXCL) { + mrupdate(&ip->i_lock); + } else if (lock_flags & XFS_ILOCK_SHARED) { + mraccess(&ip->i_lock); + } + xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address); +} + +/* + * This is just like xfs_ilock(), except that the caller + * is guaranteed not to sleep. It returns 1 if it gets + * the requested locks and 0 otherwise. If the IO lock is + * obtained but the inode lock cannot be, then the IO lock + * is dropped before returning. + * + * ip -- the inode being locked + * lock_flags -- this parameter indicates the inode's locks to be + * to be locked. See the comment for xfs_ilock() for a list + * of valid values. + * + */ +int +xfs_ilock_nowait(xfs_inode_t *ip, + uint lock_flags) +{ + int iolocked; + int ilocked; + + /* + * You can't set both SHARED and EXCL for the same lock, + * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, + * and XFS_ILOCK_EXCL are valid values to set in lock_flags. + */ + ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != + (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); + ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != + (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); + ASSERT((lock_flags & ~XFS_LOCK_MASK) == 0); + + iolocked = 0; + if (lock_flags & XFS_IOLOCK_EXCL) { + iolocked = mrtryupdate(&ip->i_iolock); + if (!iolocked) { + return 0; + } + } else if (lock_flags & XFS_IOLOCK_SHARED) { + iolocked = mrtryaccess(&ip->i_iolock); + if (!iolocked) { + return 0; + } + } + if (lock_flags & XFS_ILOCK_EXCL) { + ilocked = mrtryupdate(&ip->i_lock); + if (!ilocked) { + if (iolocked) { + mrunlock(&ip->i_iolock); + } + return 0; + } + } else if (lock_flags & XFS_ILOCK_SHARED) { + ilocked = mrtryaccess(&ip->i_lock); + if (!ilocked) { + if (iolocked) { + mrunlock(&ip->i_iolock); + } + return 0; + } + } + xfs_ilock_trace(ip, 2, lock_flags, (inst_t *)__return_address); + return 1; +} + +/* + * xfs_iunlock() is used to drop the inode locks acquired with + * xfs_ilock() and xfs_ilock_nowait(). The caller must pass + * in the flags given to xfs_ilock() or xfs_ilock_nowait() so + * that we know which locks to drop. + * + * ip -- the inode being unlocked + * lock_flags -- this parameter indicates the inode's locks to be + * to be unlocked. See the comment for xfs_ilock() for a list + * of valid values for this parameter. + * + */ +void +xfs_iunlock(xfs_inode_t *ip, + uint lock_flags) +{ + /* + * You can't set both SHARED and EXCL for the same lock, + * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, + * and XFS_ILOCK_EXCL are valid values to set in lock_flags. + */ + ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != + (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); + ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != + (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); + ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_IUNLOCK_NONOTIFY)) == 0); + ASSERT(lock_flags != 0); + + if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) { + ASSERT(!(lock_flags & XFS_IOLOCK_SHARED) || + (ismrlocked(&ip->i_iolock, MR_ACCESS))); + ASSERT(!(lock_flags & XFS_IOLOCK_EXCL) || + (ismrlocked(&ip->i_iolock, MR_UPDATE))); + mrunlock(&ip->i_iolock); + } + + if (lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) { + ASSERT(!(lock_flags & XFS_ILOCK_SHARED) || + (ismrlocked(&ip->i_lock, MR_ACCESS))); + ASSERT(!(lock_flags & XFS_ILOCK_EXCL) || + (ismrlocked(&ip->i_lock, MR_UPDATE))); + mrunlock(&ip->i_lock); + + /* + * Let the AIL know that this item has been unlocked in case + * it is in the AIL and anyone is waiting on it. Don't do + * this if the caller has asked us not to. + */ + if (!(lock_flags & XFS_IUNLOCK_NONOTIFY) && + ip->i_itemp != NULL) { + xfs_trans_unlocked_item(ip->i_mount, + (xfs_log_item_t*)(ip->i_itemp)); + } + } + xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address); +} + +/* + * give up write locks. the i/o lock cannot be held nested + * if it is being demoted. + */ +void +xfs_ilock_demote(xfs_inode_t *ip, + uint lock_flags) +{ + ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)); + ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); + + if (lock_flags & XFS_ILOCK_EXCL) { + ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); + mrdemote(&ip->i_lock); + } + if (lock_flags & XFS_IOLOCK_EXCL) { + ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE)); + mrdemote(&ip->i_iolock); + } +} + +/* + * The following three routines simply manage the i_flock + * semaphore embedded in the inode. This semaphore synchronizes + * processes attempting to flush the in-core inode back to disk. + */ +void +xfs_iflock(xfs_inode_t *ip) +{ + psema(&(ip->i_flock), PINOD|PLTWAIT); +} + +int +xfs_iflock_nowait(xfs_inode_t *ip) +{ + return (cpsema(&(ip->i_flock))); +} + +void +xfs_ifunlock(xfs_inode_t *ip) +{ + ASSERT(valusema(&(ip->i_flock)) <= 0); + vsema(&(ip->i_flock)); +} + +extern struct vop_vector xfs_vnops; + +static int +xfs_vn_allocate(xfs_mount_t *mp, xfs_inode_t *ip, struct xfs_vnode **vpp) +{ + struct vnode *vp; + struct xfs_vnode *vdata; + int error; + + /* Use zone allocator here? */ + vdata = kmem_zalloc(sizeof(*vdata), KM_SLEEP); + + error = getnewvnode("xfs", XVFSTOMNT(XFS_MTOVFS(mp)), + &xfs_vnops, &vp); + if (error) { + kmem_free(vdata, sizeof(*vdata)); + return (error); + } + + vp->v_vnlock->lk_flags |= LK_CANRECURSE; + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread); + + vp->v_data = (void *)vdata; + vdata->v_number= 0; + vdata->v_inode = ip; + vdata->v_vfsp = XFS_MTOVFS(mp); + vdata->v_vnode = vp; + vdata->v_type = vp->v_type = VNON; + + vn_bhv_head_init(VN_BHV_HEAD(vdata), "vnode"); + +#ifdef CONFIG_XFS_VNODE_TRACING + vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP); +#endif /* CONFIG_XFS_VNODE_TRACING */ + + vn_trace_exit(vp, "vn_initialize", (inst_t *)__return_address); + + if (error == 0) + *vpp = vdata; + + return (error); +} diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_ioctl.c b/sys/gnu/fs/xfs/FreeBSD/xfs_ioctl.c new file mode 100644 index 000000000000..40c3159a6ccd --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_ioctl.c @@ -0,0 +1,1244 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" + +#include "xfs_fs.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_dir.h" +#include "xfs_dir2.h" +#include "xfs_alloc.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_alloc_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_attr_sf.h" +#include "xfs_dir_sf.h" +#include "xfs_dir2_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_bmap.h" +#include "xfs_bit.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_itable.h" +#include "xfs_rw.h" +#include "xfs_acl.h" +#include "xfs_cap.h" +#include "xfs_mac.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_utils.h" +#include "xfs_dfrag.h" +#include "xfs_fsops.h" + + +#if XXXKAN +/* + * ioctl commands that are used by Linux filesystems + */ +#define XFS_IOC_GETXFLAGS _IOR('f', 1, long) +#define XFS_IOC_SETXFLAGS _IOW('f', 2, long) +#define XFS_IOC_GETVERSION _IOR('v', 1, long) + + +/* + * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to + * a file or fs handle. + * + * XFS_IOC_PATH_TO_FSHANDLE + * returns fs handle for a mount point or path within that mount point + * XFS_IOC_FD_TO_HANDLE + * returns full handle for a FD opened in user space + * XFS_IOC_PATH_TO_HANDLE + * returns full handle for a path + */ +STATIC int +xfs_find_handle( + unsigned int cmd, + unsigned long arg) +{ + int hsize; + xfs_handle_t handle; + xfs_fsop_handlereq_t hreq; + struct xfs_vnode *vp; + struct thread *td = curthread; + + if (copy_from_user(&hreq, (xfs_fsop_handlereq_t *)arg, sizeof(hreq))) + return XFS_ERROR(EFAULT); + + memset((char *)&handle, 0, sizeof(handle)); + + switch (cmd) { + case XFS_IOC_PATH_TO_FSHANDLE: + case XFS_IOC_PATH_TO_HANDLE: { + struct nameidata nd; + int error; + + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, + UIO_USERSPACE, hreq.path, td); + error = namei(&nd); + if (error) + return error; + NDFREE(&nd, NDF_ONLY_PNBUF); + break; + } + + case XFS_IOC_FD_TO_HANDLE: { + struct file *file; + + error = getvnode(td->td_proc->p_fd, hreq.fd, &file); + if (error) + return error; + + error = vget(vp, LK_EXCLUSIVE, td); + if (error) { + fdrop(file); + return error; + } + fdrop(file); + break; + } + + default: + ASSERT(0); + return XFS_ERROR(EINVAL); + } + + if (inode->i_sb->s_magic != XFS_SB_MAGIC) { + /* we're not in XFS anymore, Toto */ + iput(inode); + return XFS_ERROR(EINVAL); + } + + /* we need the vnode */ + vp = LINVFS_GET_VP(inode); + if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) { + iput(inode); + return XFS_ERROR(EBADF); + } + + /* now we can grab the fsid */ + memcpy(&handle.ha_fsid, vp->v_vfsp->vfs_altfsid, sizeof(xfs_fsid_t)); + hsize = sizeof(xfs_fsid_t); + + if (cmd != XFS_IOC_PATH_TO_FSHANDLE) { + xfs_inode_t *ip; + bhv_desc_t *bhv; + int lock_mode; + + /* need to get access to the xfs_inode to read the generation */ + bhv = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops); + ASSERT(bhv); + ip = XFS_BHVTOI(bhv); + ASSERT(ip); + lock_mode = xfs_ilock_map_shared(ip); + + /* fill in fid section of handle from inode */ + handle.ha_fid.xfs_fid_len = sizeof(xfs_fid_t) - + sizeof(handle.ha_fid.xfs_fid_len); + handle.ha_fid.xfs_fid_pad = 0; + handle.ha_fid.xfs_fid_gen = ip->i_d.di_gen; + handle.ha_fid.xfs_fid_ino = ip->i_ino; + + xfs_iunlock_map_shared(ip, lock_mode); + + hsize = XFS_HSIZE(handle); + } + + /* now copy our handle into the user buffer & write out the size */ + if (copy_to_user((xfs_handle_t *)hreq.ohandle, &handle, hsize) || + copy_to_user(hreq.ohandlen, &hsize, sizeof(__s32))) { + iput(inode); + return -XFS_ERROR(EFAULT); + } + + iput(inode); + return 0; +} + + +/* + * Convert userspace handle data into vnode (and inode). + * We [ab]use the fact that all the fsop_handlereq ioctl calls + * have a data structure argument whose first component is always + * a xfs_fsop_handlereq_t, so we can cast to and from this type. + * This allows us to optimise the copy_from_user calls and gives + * a handy, shared routine. + * + * If no error, caller must always VN_RELE the returned vp. + */ +STATIC int +xfs_vget_fsop_handlereq( + xfs_mount_t *mp, + struct inode *parinode, /* parent inode pointer */ + int cap, /* capability level for op */ + unsigned long arg, /* userspace data pointer */ + unsigned long size, /* size of expected struct */ + /* output arguments */ + xfs_fsop_handlereq_t *hreq, + vnode_t **vp, + struct inode **inode) +{ + void *hanp; + size_t hlen; + xfs_fid_t *xfid; + xfs_handle_t *handlep; + xfs_handle_t handle; + xfs_inode_t *ip; + struct inode *inodep; + vnode_t *vpp; + xfs_ino_t ino; + __u32 igen; + int error; + + if (!capable(cap)) + return XFS_ERROR(EPERM); + + /* + * Only allow handle opens under a directory. + */ + if (!S_ISDIR(parinode->i_mode)) + return XFS_ERROR(ENOTDIR); + + /* + * Copy the handle down from the user and validate + * that it looks to be in the correct format. + */ + if (copy_from_user(hreq, (struct xfs_fsop_handlereq *)arg, size)) + return XFS_ERROR(EFAULT); + + hanp = hreq->ihandle; + hlen = hreq->ihandlen; + handlep = &handle; + + if (hlen < sizeof(handlep->ha_fsid) || hlen > sizeof(*handlep)) + return XFS_ERROR(EINVAL); + if (copy_from_user(handlep, hanp, hlen)) + return XFS_ERROR(EFAULT); + if (hlen < sizeof(*handlep)) + memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen); + if (hlen > sizeof(handlep->ha_fsid)) { + if (handlep->ha_fid.xfs_fid_len != + (hlen - sizeof(handlep->ha_fsid) + - sizeof(handlep->ha_fid.xfs_fid_len)) + || handlep->ha_fid.xfs_fid_pad) + return XFS_ERROR(EINVAL); + } + + /* + * Crack the handle, obtain the inode # & generation # + */ + xfid = (struct xfs_fid *)&handlep->ha_fid; + if (xfid->xfs_fid_len == sizeof(*xfid) - sizeof(xfid->xfs_fid_len)) { + ino = xfid->xfs_fid_ino; + igen = xfid->xfs_fid_gen; + } else { + return XFS_ERROR(EINVAL); + } + + /* + * Get the XFS inode, building a vnode to go with it. + */ + error = xfs_iget(mp, NULL, ino, XFS_ILOCK_SHARED, &ip, 0); + if (error) + return error; + if (ip == NULL) + return XFS_ERROR(EIO); + if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) { + xfs_iput_new(ip, XFS_ILOCK_SHARED); + return XFS_ERROR(ENOENT); + } + + vpp = XFS_ITOV(ip); + inodep = LINVFS_GET_IP(vpp); + xfs_iunlock(ip, XFS_ILOCK_SHARED); + + *vp = vpp; + *inode = inodep; + return 0; +} + +STATIC int +xfs_open_by_handle( + xfs_mount_t *mp, + unsigned long arg, + struct file *parfilp, + struct inode *parinode) +{ + int error; + int new_fd; + int permflag; + struct file *filp; + struct inode *inode; + struct dentry *dentry; + vnode_t *vp; + xfs_fsop_handlereq_t hreq; + + error = xfs_vget_fsop_handlereq(mp, parinode, CAP_SYS_ADMIN, arg, + sizeof(xfs_fsop_handlereq_t), + &hreq, &vp, &inode); + if (error) + return -error; + + /* Restrict xfs_open_by_handle to directories & regular files. */ + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) { + iput(inode); + return -XFS_ERROR(EINVAL); + } + +#if BITS_PER_LONG != 32 + hreq.oflags |= O_LARGEFILE; +#endif + /* Put open permission in namei format. */ + permflag = hreq.oflags; + if ((permflag+1) & O_ACCMODE) + permflag++; + if (permflag & O_TRUNC) + permflag |= 2; + + if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) && + (permflag & FMODE_WRITE) && IS_APPEND(inode)) { + iput(inode); + return -XFS_ERROR(EPERM); + } + + if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) { + iput(inode); + return -XFS_ERROR(EACCES); + } + + /* Can't write directories. */ + if ( S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) { + iput(inode); + return -XFS_ERROR(EISDIR); + } + + if ((new_fd = get_unused_fd()) < 0) { + iput(inode); + return new_fd; + } + + dentry = d_alloc_anon(inode); + if (dentry == NULL) { + iput(inode); + put_unused_fd(new_fd); + return -XFS_ERROR(ENOMEM); + } + + /* Ensure umount returns EBUSY on umounts while this file is open. */ + mntget(parfilp->f_vfsmnt); + + /* Create file pointer. */ + filp = dentry_open(dentry, parfilp->f_vfsmnt, hreq.oflags); + if (IS_ERR(filp)) { + put_unused_fd(new_fd); + return -XFS_ERROR(-PTR_ERR(filp)); + } + if (inode->i_mode & S_IFREG) + filp->f_op = &linvfs_invis_file_operations; + + fd_install(new_fd, filp); + return new_fd; +} + +STATIC int +xfs_readlink_by_handle( + xfs_mount_t *mp, + unsigned long arg, + struct file *parfilp, + struct inode *parinode) +{ + int error; + struct iovec aiov; + struct uio auio; + struct inode *inode; + xfs_fsop_handlereq_t hreq; + vnode_t *vp; + __u32 olen; + + error = xfs_vget_fsop_handlereq(mp, parinode, CAP_SYS_ADMIN, arg, + sizeof(xfs_fsop_handlereq_t), + &hreq, &vp, &inode); + if (error) + return -error; + + /* Restrict this handle operation to symlinks only. */ + if (vp->v_type != VLNK) { + VN_RELE(vp); + return -XFS_ERROR(EINVAL); + } + + if (copy_from_user(&olen, hreq.ohandlen, sizeof(__u32))) { + VN_RELE(vp); + return -XFS_ERROR(EFAULT); + } + aiov.iov_len = olen; + aiov.iov_base = hreq.ohandle; + + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = 0; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_resid = olen; + + VOP_READLINK(vp, &auio, IO_INVIS, NULL, error); + + VN_RELE(vp); + return (olen - auio.uio_resid); +} + +STATIC int +xfs_fssetdm_by_handle( + xfs_mount_t *mp, + unsigned long arg, + struct file *parfilp, + struct inode *parinode) +{ + int error; + struct fsdmidata fsd; + xfs_fsop_setdm_handlereq_t dmhreq; + struct inode *inode; + bhv_desc_t *bdp; + vnode_t *vp; + + error = xfs_vget_fsop_handlereq(mp, parinode, CAP_MKNOD, arg, + sizeof(xfs_fsop_setdm_handlereq_t), + (xfs_fsop_handlereq_t *)&dmhreq, + &vp, &inode); + if (error) + return -error; + + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { + VN_RELE(vp); + return -XFS_ERROR(EPERM); + } + + if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) { + VN_RELE(vp); + return -XFS_ERROR(EFAULT); + } + + bdp = bhv_base_unlocked(VN_BHV_HEAD(vp)); + error = xfs_set_dmattrs(bdp, fsd.fsd_dmevmask, fsd.fsd_dmstate, NULL); + + VN_RELE(vp); + if (error) + return -error; + return 0; +} + +STATIC int +xfs_attrlist_by_handle( + xfs_mount_t *mp, + unsigned long arg, + struct file *parfilp, + struct inode *parinode) +{ + int error; + attrlist_cursor_kern_t *cursor; + xfs_fsop_attrlist_handlereq_t al_hreq; + struct inode *inode; + vnode_t *vp; + + error = xfs_vget_fsop_handlereq(mp, parinode, CAP_SYS_ADMIN, arg, + sizeof(xfs_fsop_attrlist_handlereq_t), + (xfs_fsop_handlereq_t *)&al_hreq, + &vp, &inode); + if (error) + return -error; + + cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; + VOP_ATTR_LIST(vp, al_hreq.buffer, al_hreq.buflen, al_hreq.flags, + cursor, NULL, error); + VN_RELE(vp); + if (error) + return -error; + return 0; +} + +STATIC int +xfs_attrmulti_by_handle( + xfs_mount_t *mp, + unsigned long arg, + struct file *parfilp, + struct inode *parinode) +{ + int error; + xfs_attr_multiop_t *ops; + xfs_fsop_attrmulti_handlereq_t am_hreq; + struct inode *inode; + vnode_t *vp; + int i, size; + + error = xfs_vget_fsop_handlereq(mp, parinode, CAP_SYS_ADMIN, arg, + sizeof(xfs_fsop_attrmulti_handlereq_t), + (xfs_fsop_handlereq_t *)&am_hreq, + &vp, &inode); + if (error) + return -error; + + size = am_hreq.opcount * sizeof(attr_multiop_t); + ops = (xfs_attr_multiop_t *)kmalloc(size, GFP_KERNEL); + if (!ops) { + VN_RELE(vp); + return -XFS_ERROR(ENOMEM); + } + + if (copy_from_user(ops, am_hreq.ops, size)) { + kfree(ops); + VN_RELE(vp); + return -XFS_ERROR(EFAULT); + } + + for (i = 0; i < am_hreq.opcount; i++) { + switch(ops[i].am_opcode) { + case ATTR_OP_GET: + VOP_ATTR_GET(vp,ops[i].am_attrname, ops[i].am_attrvalue, + &ops[i].am_length, ops[i].am_flags, + NULL, ops[i].am_error); + break; + case ATTR_OP_SET: + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { + ops[i].am_error = EPERM; + break; + } + VOP_ATTR_SET(vp,ops[i].am_attrname, ops[i].am_attrvalue, + ops[i].am_length, ops[i].am_flags, + NULL, ops[i].am_error); + break; + case ATTR_OP_REMOVE: + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { + ops[i].am_error = EPERM; + break; + } + VOP_ATTR_REMOVE(vp, ops[i].am_attrname, ops[i].am_flags, + NULL, ops[i].am_error); + break; + default: + ops[i].am_error = EINVAL; + } + } + + if (copy_to_user(am_hreq.ops, ops, size)) + error = -XFS_ERROR(EFAULT); + + kfree(ops); + VN_RELE(vp); + return error; +} + +/* prototypes for a few of the stack-hungry cases that have + * their own functions. Functions are defined after their use + * so gcc doesn't get fancy and inline them with -03 */ + +STATIC int +xfs_ioc_space( + bhv_desc_t *bdp, + vnode_t *vp, + struct file *filp, + int flags, + unsigned int cmd, + unsigned long arg); + +STATIC int +xfs_ioc_bulkstat( + xfs_mount_t *mp, + unsigned int cmd, + unsigned long arg); + +STATIC int +xfs_ioc_fsgeometry_v1( + xfs_mount_t *mp, + unsigned long arg); + +STATIC int +xfs_ioc_fsgeometry( + xfs_mount_t *mp, + unsigned long arg); + +STATIC int +xfs_ioc_xattr( + vnode_t *vp, + xfs_inode_t *ip, + struct file *filp, + unsigned int cmd, + unsigned long arg); + +STATIC int +xfs_ioc_getbmap( + bhv_desc_t *bdp, + struct file *filp, + int flags, + unsigned int cmd, + unsigned long arg); + +STATIC int +xfs_ioc_getbmapx( + bhv_desc_t *bdp, + unsigned long arg); + +int +xfs_ioctl( + bhv_desc_t *bdp, + struct inode *inode, + struct file *filp, + int ioflags, + unsigned int cmd, + unsigned long arg) +{ + int error; + vnode_t *vp; + xfs_inode_t *ip; + xfs_mount_t *mp; + + vp = LINVFS_GET_VP(inode); + + vn_trace_entry(vp, "xfs_ioctl", (inst_t *)__return_address); + + ip = XFS_BHVTOI(bdp); + mp = ip->i_mount; + + switch (cmd) { + + case XFS_IOC_ALLOCSP: + case XFS_IOC_FREESP: + case XFS_IOC_RESVSP: + case XFS_IOC_UNRESVSP: + case XFS_IOC_ALLOCSP64: + case XFS_IOC_FREESP64: + case XFS_IOC_RESVSP64: + case XFS_IOC_UNRESVSP64: + /* + * Only allow the sys admin to reserve space unless + * unwritten extents are enabled. + */ + if (!XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb) && + !capable(CAP_SYS_ADMIN)) + return -EPERM; + + return xfs_ioc_space(bdp, vp, filp, ioflags, cmd, arg); + + case XFS_IOC_DIOINFO: { + struct dioattr da; + + da.d_miniosz = mp->m_sb.sb_blocksize; + da.d_mem = mp->m_sb.sb_blocksize; + + /* + * this only really needs to be BBSIZE. + * it is set to the file system block size to + * avoid having to do block zeroing on short writes. + */ + da.d_maxiosz = XFS_FSB_TO_B(mp, + XFS_B_TO_FSBT(mp, KIO_MAX_ATOMIC_IO << 10)); + + if (copy_to_user((struct dioattr *)arg, &da, sizeof(da))) + return -XFS_ERROR(EFAULT); + return 0; + } + + case XFS_IOC_FSBULKSTAT_SINGLE: + case XFS_IOC_FSBULKSTAT: + case XFS_IOC_FSINUMBERS: + return xfs_ioc_bulkstat(mp, cmd, arg); + + case XFS_IOC_FSGEOMETRY_V1: + return xfs_ioc_fsgeometry_v1(mp, arg); + + case XFS_IOC_FSGEOMETRY: + return xfs_ioc_fsgeometry(mp, arg); + + case XFS_IOC_GETVERSION: + case XFS_IOC_GETXFLAGS: + case XFS_IOC_SETXFLAGS: + case XFS_IOC_FSGETXATTR: + case XFS_IOC_FSSETXATTR: + case XFS_IOC_FSGETXATTRA: + return xfs_ioc_xattr(vp, ip, filp, cmd, arg); + + case XFS_IOC_FSSETDM: { + struct fsdmidata dmi; + + if (copy_from_user(&dmi, (struct fsdmidata *)arg, sizeof(dmi))) + return -XFS_ERROR(EFAULT); + + error = xfs_set_dmattrs(bdp, dmi.fsd_dmevmask, dmi.fsd_dmstate, + NULL); + return -error; + } + + case XFS_IOC_GETBMAP: + case XFS_IOC_GETBMAPA: + return xfs_ioc_getbmap(bdp, filp, ioflags, cmd, arg); + + case XFS_IOC_GETBMAPX: + return xfs_ioc_getbmapx(bdp, arg); + + case XFS_IOC_FD_TO_HANDLE: + case XFS_IOC_PATH_TO_HANDLE: + case XFS_IOC_PATH_TO_FSHANDLE: + return xfs_find_handle(cmd, arg); + + case XFS_IOC_OPEN_BY_HANDLE: + return xfs_open_by_handle(mp, arg, filp, inode); + + case XFS_IOC_FSSETDM_BY_HANDLE: + return xfs_fssetdm_by_handle(mp, arg, filp, inode); + + case XFS_IOC_READLINK_BY_HANDLE: + return xfs_readlink_by_handle(mp, arg, filp, inode); + + case XFS_IOC_ATTRLIST_BY_HANDLE: + return xfs_attrlist_by_handle(mp, arg, filp, inode); + + case XFS_IOC_ATTRMULTI_BY_HANDLE: + return xfs_attrmulti_by_handle(mp, arg, filp, inode); + + case XFS_IOC_SWAPEXT: { + error = xfs_swapext((struct xfs_swapext *)arg); + return -error; + } + + case XFS_IOC_FSCOUNTS: { + xfs_fsop_counts_t out; + + error = xfs_fs_counts(mp, &out); + if (error) + return -error; + + if (copy_to_user((char *)arg, &out, sizeof(out))) + return -XFS_ERROR(EFAULT); + return 0; + } + + case XFS_IOC_SET_RESBLKS: { + xfs_fsop_resblks_t inout; + __uint64_t in; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&inout, (char *)arg, sizeof(inout))) + return -XFS_ERROR(EFAULT); + + /* input parameter is passed in resblks field of structure */ + in = inout.resblks; + error = xfs_reserve_blocks(mp, &in, &inout); + if (error) + return -error; + + if (copy_to_user((char *)arg, &inout, sizeof(inout))) + return -XFS_ERROR(EFAULT); + return 0; + } + + case XFS_IOC_GET_RESBLKS: { + xfs_fsop_resblks_t out; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + error = xfs_reserve_blocks(mp, NULL, &out); + if (error) + return -error; + + if (copy_to_user((char *)arg, &out, sizeof(out))) + return -XFS_ERROR(EFAULT); + + return 0; + } + + case XFS_IOC_FSGROWFSDATA: { + xfs_growfs_data_t in; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&in, (char *)arg, sizeof(in))) + return -XFS_ERROR(EFAULT); + + error = xfs_growfs_data(mp, &in); + return -error; + } + + case XFS_IOC_FSGROWFSLOG: { + xfs_growfs_log_t in; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&in, (char *)arg, sizeof(in))) + return -XFS_ERROR(EFAULT); + + error = xfs_growfs_log(mp, &in); + return -error; + } + + case XFS_IOC_FSGROWFSRT: { + xfs_growfs_rt_t in; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&in, (char *)arg, sizeof(in))) + return -XFS_ERROR(EFAULT); + + error = xfs_growfs_rt(mp, &in); + return -error; + } + + case XFS_IOC_FREEZE: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + xfs_fs_freeze(mp); + return 0; + + case XFS_IOC_THAW: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + xfs_fs_thaw(mp); + return 0; + + case XFS_IOC_GOINGDOWN: { + __uint32_t in; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (get_user(in, (__uint32_t *)arg)) + return -XFS_ERROR(EFAULT); + + error = xfs_fs_goingdown(mp, in); + return -error; + } + + case XFS_IOC_ERROR_INJECTION: { + xfs_error_injection_t in; + + if (copy_from_user(&in, (char *)arg, sizeof(in))) + return -XFS_ERROR(EFAULT); + + error = xfs_errortag_add(in.errtag, mp); + return -error; + } + + case XFS_IOC_ERROR_CLEARALL: + error = xfs_errortag_clearall(mp); + return -error; + + default: + return -ENOTTY; + } +} + +STATIC int +xfs_ioc_space( + bhv_desc_t *bdp, + vnode_t *vp, + struct file *filp, + int ioflags, + unsigned int cmd, + unsigned long arg) +{ + xfs_flock64_t bf; + int attr_flags = 0; + int error; + + if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND)) + return -XFS_ERROR(EPERM); + + if (filp->f_flags & O_RDONLY) + return -XFS_ERROR(EBADF); + + if (vp->v_type != VREG) + return -XFS_ERROR(EINVAL); + + if (copy_from_user(&bf, (xfs_flock64_t *)arg, sizeof(bf))) + return -XFS_ERROR(EFAULT); + + if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) + attr_flags |= ATTR_NONBLOCK; + if (ioflags & IO_INVIS) + attr_flags |= ATTR_DMI; + + error = xfs_change_file_space(bdp, cmd, &bf, filp->f_pos, + NULL, attr_flags); + return -error; +} + +STATIC int +xfs_ioc_bulkstat( + xfs_mount_t *mp, + unsigned int cmd, + unsigned long arg) +{ + xfs_fsop_bulkreq_t bulkreq; + int count; /* # of records returned */ + xfs_ino_t inlast; /* last inode number */ + int done; + int error; + + /* done = 1 if there are more stats to get and if bulkstat */ + /* should be called again (unused here, but used in dmapi) */ + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -XFS_ERROR(EIO); + + if (copy_from_user(&bulkreq, (xfs_fsop_bulkreq_t *)arg, + sizeof(xfs_fsop_bulkreq_t))) + return -XFS_ERROR(EFAULT); + + if (copy_from_user(&inlast, (__s64 *)bulkreq.lastip, + sizeof(__s64))) + return -XFS_ERROR(EFAULT); + + if ((count = bulkreq.icount) <= 0) + return -XFS_ERROR(EINVAL); + + if (cmd == XFS_IOC_FSINUMBERS) + error = xfs_inumbers(mp, NULL, &inlast, &count, + bulkreq.ubuffer); + else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) + error = xfs_bulkstat_single(mp, &inlast, + bulkreq.ubuffer, &done); + else { /* XFS_IOC_FSBULKSTAT */ + if (count == 1 && inlast != 0) { + inlast++; + error = xfs_bulkstat_single(mp, &inlast, + bulkreq.ubuffer, &done); + } else { + error = xfs_bulkstat(mp, NULL, &inlast, &count, + (bulkstat_one_pf)xfs_bulkstat_one, NULL, + sizeof(xfs_bstat_t), bulkreq.ubuffer, + BULKSTAT_FG_QUICK, &done); + } + } + + if (error) + return -error; + + if (bulkreq.ocount != NULL) { + if (copy_to_user((xfs_ino_t *)bulkreq.lastip, &inlast, + sizeof(xfs_ino_t))) + return -XFS_ERROR(EFAULT); + + if (copy_to_user((__s32 *)bulkreq.ocount, &count, + sizeof(count))) + return -XFS_ERROR(EFAULT); + } + + return 0; +} + +STATIC int +xfs_ioc_fsgeometry_v1( + xfs_mount_t *mp, + unsigned long arg) +{ + xfs_fsop_geom_v1_t fsgeo; + int error; + + error = xfs_fs_geometry(mp, (xfs_fsop_geom_t *)&fsgeo, 3); + if (error) + return -error; + + if (copy_to_user((xfs_fsop_geom_t *)arg, &fsgeo, sizeof(fsgeo))) + return -XFS_ERROR(EFAULT); + return 0; +} + +STATIC int +xfs_ioc_fsgeometry( + xfs_mount_t *mp, + unsigned long arg) +{ + xfs_fsop_geom_t fsgeo; + int error; + + error = xfs_fs_geometry(mp, &fsgeo, 4); + if (error) + return -error; + + if (copy_to_user((xfs_fsop_geom_t *)arg, &fsgeo, sizeof(fsgeo))) + return -XFS_ERROR(EFAULT); + return 0; +} + +/* + * Linux extended inode flags interface. + */ +#define LINUX_XFLAG_SYNC 0x00000008 /* Synchronous updates */ +#define LINUX_XFLAG_IMMUTABLE 0x00000010 /* Immutable file */ +#define LINUX_XFLAG_APPEND 0x00000020 /* writes to file may only append */ +#define LINUX_XFLAG_NODUMP 0x00000040 /* do not dump file */ +#define LINUX_XFLAG_NOATIME 0x00000080 /* do not update atime */ + +STATIC unsigned int +xfs_merge_ioc_xflags( + unsigned int flags, + unsigned int start) +{ + unsigned int xflags = start; + + if (flags & LINUX_XFLAG_IMMUTABLE) + xflags |= XFS_XFLAG_IMMUTABLE; + else + xflags &= ~XFS_XFLAG_IMMUTABLE; + if (flags & LINUX_XFLAG_APPEND) + xflags |= XFS_XFLAG_APPEND; + else + xflags &= ~XFS_XFLAG_APPEND; + if (flags & LINUX_XFLAG_SYNC) + xflags |= XFS_XFLAG_SYNC; + else + xflags &= ~XFS_XFLAG_SYNC; + if (flags & LINUX_XFLAG_NOATIME) + xflags |= XFS_XFLAG_NOATIME; + else + xflags &= ~XFS_XFLAG_NOATIME; + if (flags & LINUX_XFLAG_NODUMP) + xflags |= XFS_XFLAG_NODUMP; + else + xflags &= ~XFS_XFLAG_NODUMP; + + return xflags; +} + +STATIC int +xfs_ioc_xattr( + vnode_t *vp, + xfs_inode_t *ip, + struct file *filp, + unsigned int cmd, + unsigned long arg) +{ + struct fsxattr fa; + vattr_t va; + int error; + int attr_flags; + unsigned int flags; + + switch (cmd) { + case XFS_IOC_FSGETXATTR: { + va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS; + VOP_GETATTR(vp, &va, 0, NULL, error); + if (error) + return -error; + + fa.fsx_xflags = va.va_xflags; + fa.fsx_extsize = va.va_extsize; + fa.fsx_nextents = va.va_nextents; + + if (copy_to_user((struct fsxattr *)arg, &fa, sizeof(fa))) + return -XFS_ERROR(EFAULT); + return 0; + } + + case XFS_IOC_FSSETXATTR: { + if (copy_from_user(&fa, (struct fsxattr *)arg, sizeof(fa))) + return -XFS_ERROR(EFAULT); + + attr_flags = 0; + if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) + attr_flags |= ATTR_NONBLOCK; + + va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE; + va.va_xflags = fa.fsx_xflags; + va.va_extsize = fa.fsx_extsize; + + VOP_SETATTR(vp, &va, attr_flags, NULL, error); + if (!error) + vn_revalidate(vp); /* update Linux inode flags */ + return -error; + } + + case XFS_IOC_FSGETXATTRA: { + va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_ANEXTENTS; + VOP_GETATTR(vp, &va, 0, NULL, error); + if (error) + return -error; + + fa.fsx_xflags = va.va_xflags; + fa.fsx_extsize = va.va_extsize; + fa.fsx_nextents = va.va_anextents; + + if (copy_to_user((struct fsxattr *)arg, &fa, sizeof(fa))) + return -XFS_ERROR(EFAULT); + return 0; + } + + case XFS_IOC_GETXFLAGS: { + flags = 0; + if (ip->i_d.di_flags & XFS_XFLAG_IMMUTABLE) + flags |= LINUX_XFLAG_IMMUTABLE; + if (ip->i_d.di_flags & XFS_XFLAG_APPEND) + flags |= LINUX_XFLAG_APPEND; + if (ip->i_d.di_flags & XFS_XFLAG_SYNC) + flags |= LINUX_XFLAG_SYNC; + if (ip->i_d.di_flags & XFS_XFLAG_NOATIME) + flags |= LINUX_XFLAG_NOATIME; + if (ip->i_d.di_flags & XFS_XFLAG_NODUMP) + flags |= LINUX_XFLAG_NODUMP; + if (copy_to_user((unsigned int *)arg, &flags, sizeof(flags))) + return -XFS_ERROR(EFAULT); + return 0; + } + + case XFS_IOC_SETXFLAGS: { + if (copy_from_user(&flags, (unsigned int *)arg, sizeof(flags))) + return -XFS_ERROR(EFAULT); + + if (flags & ~(LINUX_XFLAG_IMMUTABLE | LINUX_XFLAG_APPEND | \ + LINUX_XFLAG_NOATIME | LINUX_XFLAG_NODUMP | \ + LINUX_XFLAG_SYNC)) + return -XFS_ERROR(EOPNOTSUPP); + + attr_flags = 0; + if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) + attr_flags |= ATTR_NONBLOCK; + + va.va_mask = XFS_AT_XFLAGS; + va.va_xflags = xfs_merge_ioc_xflags(flags, ip->i_d.di_flags); + + VOP_SETATTR(vp, &va, attr_flags, NULL, error); + if (!error) + vn_revalidate(vp); /* update Linux inode flags */ + return -error; + } + + case XFS_IOC_GETVERSION: { + flags = LINVFS_GET_IP(vp)->i_generation; + if (copy_to_user((unsigned int *)arg, &flags, sizeof(flags))) + return -XFS_ERROR(EFAULT); + return 0; + } + + default: + return -ENOTTY; + } +} + +STATIC int +xfs_ioc_getbmap( + bhv_desc_t *bdp, + struct file *filp, + int ioflags, + unsigned int cmd, + unsigned long arg) +{ + struct getbmap bm; + int iflags; + int error; + + if (copy_from_user(&bm, (struct getbmap *)arg, sizeof(bm))) + return -XFS_ERROR(EFAULT); + + if (bm.bmv_count < 2) + return -XFS_ERROR(EINVAL); + + iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0); + if (ioflags & IO_INVIS) + iflags |= BMV_IF_NO_DMAPI_READ; + + error = xfs_getbmap(bdp, &bm, (struct getbmap *)arg+1, iflags); + if (error) + return -error; + + if (copy_to_user((struct getbmap *)arg, &bm, sizeof(bm))) + return -XFS_ERROR(EFAULT); + return 0; +} + +STATIC int +xfs_ioc_getbmapx( + bhv_desc_t *bdp, + unsigned long arg) +{ + struct getbmapx bmx; + struct getbmap bm; + int iflags; + int error; + + if (copy_from_user(&bmx, (struct getbmapx *)arg, sizeof(bmx))) + return -XFS_ERROR(EFAULT); + + if (bmx.bmv_count < 2) + return -XFS_ERROR(EINVAL); + + /* + * Map input getbmapx structure to a getbmap + * structure for xfs_getbmap. + */ + GETBMAP_CONVERT(bmx, bm); + + iflags = bmx.bmv_iflags; + + if (iflags & (~BMV_IF_VALID)) + return -XFS_ERROR(EINVAL); + + iflags |= BMV_IF_EXTENDED; + + error = xfs_getbmap(bdp, &bm, (struct getbmapx *)arg+1, iflags); + if (error) + return -error; + + GETBMAP_CONVERT(bm, bmx); + + if (copy_to_user((struct getbmapx *)arg, &bmx, sizeof(bmx))) + return -XFS_ERROR(EFAULT); + + return 0; +} + +#endif + +int +xfs_ioctl( + bhv_desc_t *bdp, + struct inode *inode, + struct file *filp, + int ioflags, + unsigned int cmd, + unsigned long arg) +{ + return EINVAL; +} diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_iops.h b/sys/gnu/fs/xfs/FreeBSD/xfs_iops.h new file mode 100644 index 000000000000..20549a793ce4 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_iops.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_IOPS_H__ +#define __XFS_IOPS_H__ + +/* + * Extended system attributes. + * So far only POSIX ACLs are supported, but this will need to + * grow in time (capabilities, mandatory access control, etc). + */ +#define XFS_SYSTEM_NAMESPACE SYSTEM_POSIXACL + +/* + * Define a table of the namespaces XFS supports + */ +typedef int (*xattr_exists_t)(xfs_vnode_t *); + +typedef struct xattr_namespace { + char *name; + unsigned int namelen; + xattr_exists_t exists; +} xattr_namespace_t; + +#define SYSTEM_NAMES 0 +#define ROOT_NAMES 1 +#define USER_NAMES 2 +extern struct xattr_namespace *xfs_namespaces; + +extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *, + int, unsigned int, unsigned long); +#endif /* __XFS_IOPS_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_mountops.c b/sys/gnu/fs/xfs/FreeBSD/xfs_mountops.c new file mode 100644 index 000000000000..78a091fa4003 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_mountops.c @@ -0,0 +1,459 @@ +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/malloc.h> +#include <sys/vnode.h> +#include <sys/mount.h> +#include <sys/namei.h> + +#include <geom/geom.h> +#include <geom/geom_vfs.h> + +#include "xfs.h" +#include "xfs_macros.h" +#include "xfs_types.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir.h" +#include "xfs_dir2.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_alloc_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_attr_sf.h" +#include "xfs_dir_sf.h" +#include "xfs_dir2_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_alloc.h" +#include "xfs_rtalloc.h" +#include "xfs_bmap.h" +#include "xfs_error.h" +#include "xfs_bit.h" +#include "xfs_rw.h" +#include "xfs_quota.h" +#include "xfs_fsops.h" +#include "xfs_clnt.h" + +#include <xfs_mountops.h> + +MALLOC_DEFINE(M_XFSNODE, "XFS node", "XFS vnode private part"); + +static vfs_mount_t _xfs_mount; +static vfs_unmount_t _xfs_unmount; +static vfs_root_t _xfs_root; +static vfs_quotactl_t _xfs_quotactl; +static vfs_statfs_t _xfs_statfs; +static vfs_sync_t _xfs_sync; +static vfs_vget_t _xfs_vget; +static vfs_fhtovp_t _xfs_fhtovp; +static vfs_checkexp_t _xfs_checkexp; +static vfs_vptofh_t _xfs_vptofh; +static vfs_init_t _xfs_init; +static vfs_uninit_t _xfs_uninit; +static vfs_extattrctl_t _xfs_extattrctl; + +static b_strategy_t xfs_geom_strategy; + +static const char *xfs_opts[] = + { "from", "flags", "logbufs", "logbufsize", + "rtname", "logname", "iosizelog", "sunit", + "swidth", + NULL }; + +static void +parse_int(struct mount *mp, const char *opt, int *val, int *error) +{ + char *tmp, *ep; + + tmp = vfs_getopts(mp->mnt_optnew, opt, error); + if (*error != 0) { + return; + } + if (tmp != NULL) { + *val = (int)strtol(tmp, &ep, 10); + if (*ep) { + *error = EINVAL; + return; + } + } +} + +static int +_xfs_param_copyin(struct mount *mp, struct thread *td) +{ + struct xfsmount *xmp = MNTTOXFS(mp); + struct xfs_mount_args *args = &xmp->m_args; + char *path; + char *fsname; + char *rtname; + char *logname; + int error; + + path = vfs_getopts(mp->mnt_optnew, "fspath", &error); + if (error) + return (error); + + bzero(args, sizeof(struct xfs_mount_args)); + args->logbufs = -1; + args->logbufsize = -1; + + parse_int(mp, "flags", &args->flags, &error); + if (error != 0) + return error; + + args->flags |= XFSMNT_32BITINODES; + + parse_int(mp, "sunit", &args->sunit, &error); + if (error != 0) + return error; + + parse_int(mp, "swidth", &args->swidth, &error); + if (error != 0) + return error; + + parse_int(mp, "logbufs", &args->logbufs, &error); + if (error != 0) + return error; + + parse_int(mp, "logbufsize", &args->logbufsize, &error); + if (error != 0) + return error; + + fsname = vfs_getopts(mp->mnt_optnew, "from", &error); + if (error == 0 && fsname != NULL) { + strncpy(args->fsname, fsname, sizeof(args->fsname) - 1); + } + + logname = vfs_getopts(mp->mnt_optnew, "logname", &error); + if (error == 0 && logname != NULL) { + strncpy(args->logname, logname, sizeof(args->logname) - 1); + } + + rtname = vfs_getopts(mp->mnt_optnew, "rtname", &error); + if (error == 0 && rtname != NULL) { + strncpy(args->rtname, rtname, sizeof(args->rtname) - 1); + } + + strncpy(args->mtpt, path, sizeof(args->mtpt)); + + printf("fsname '%s' logname '%s' rtname '%s'\n" + "flags 0x%x sunit %d swidth %d logbufs %d logbufsize %d\n", + args->fsname, args->logname, args->rtname, args->flags, + args->sunit, args->swidth, args->logbufs, args->logbufsize); + + vfs_mountedfrom(mp, args->fsname); + + return (0); +} + +static int +_xfs_mount(struct mount *mp, + struct thread *td) +{ + struct xfsmount *xmp; + struct xfs_vnode *rootvp; + struct ucred *curcred; + struct vnode *rvp; + struct cdev *ddev; + int error; + + if (vfs_filteropt(mp->mnt_optnew, xfs_opts)) + return (EINVAL); + + xmp = xfsmount_allocate(mp); + if (xmp == NULL) + return (ENOMEM); + + if((error = _xfs_param_copyin(mp, td)) != 0) + goto fail; + + /* Force read-only mounts in this branch. */ + XFSTOVFS(xmp)->vfs_flag |= VFS_RDONLY; + mp->mnt_flag |= MNT_RDONLY; + + /* XXX: Do not support MNT_UPDATE yet */ + if (mp->mnt_flag & MNT_UPDATE) + return EOPNOTSUPP; + + curcred = td->td_ucred; + XVFS_MOUNT(XFSTOVFS(xmp), &xmp->m_args, curcred, error); + if (error) + goto fail; + + XVFS_ROOT(XFSTOVFS(xmp), &rootvp, error); + if (error) + goto fail_unmount; + + ddev = XFS_VFSTOM(XFSTOVFS(xmp))->m_dev; + if (ddev->si_iosize_max != 0) + mp->mnt_iosize_max = ddev->si_iosize_max; + if (mp->mnt_iosize_max > MAXPHYS) + mp->mnt_iosize_max = MAXPHYS; + + mp->mnt_flag |= MNT_LOCAL | MNT_RDONLY; + mp->mnt_stat.f_fsid.val[0] = dev2udev(ddev); + mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; + + VFS_STATFS(mp, &mp->mnt_stat, td); + if (error) + goto fail_unmount; + + rvp = rootvp->v_vnode; + rvp->v_vflag |= VV_ROOT; + VN_RELE(rootvp); + + return (0); + + fail_unmount: + XVFS_UNMOUNT(XFSTOVFS(xmp), 0, curcred, error); + + fail: + if (xmp != NULL) + xfsmount_deallocate(xmp); + + return (error); +} + +/* + * Free reference to null layer + */ +static int +_xfs_unmount(mp, mntflags, td) + struct mount *mp; + int mntflags; + struct thread *td; +{ + int error; + + XVFS_UNMOUNT(MNTTOVFS(mp), 0, td->td_ucred, error); + return (error); +} + +static int +_xfs_root(mp, flags, vpp, td) + struct mount *mp; + int flags; + struct vnode **vpp; + struct thread *td; +{ + xfs_vnode_t *vp; + int error; + + XVFS_ROOT(MNTTOVFS(mp), &vp, error); + if (error == 0) { + *vpp = vp->v_vnode; + VOP_LOCK(*vpp, flags, curthread); + } + return (error); +} + +static int +_xfs_quotactl(mp, cmd, uid, arg, td) + struct mount *mp; + int cmd; + uid_t uid; + caddr_t arg; + struct thread *td; +{ + printf("xfs_quotactl\n"); + return ENOSYS; +} + +static int +_xfs_statfs(mp, sbp, td) + struct mount *mp; + struct statfs *sbp; + struct thread *td; +{ + int error; + + XVFS_STATVFS(MNTTOVFS(mp), sbp, NULL, error); + if (error) + return error; + + /* Fix up the values XFS statvfs calls does not know about. */ + sbp->f_iosize = sbp->f_bsize; + + return (error); +} + +static int +_xfs_sync(mp, waitfor, td) + struct mount *mp; + int waitfor; + struct thread *td; +{ + int error; + int flags = SYNC_FSDATA|SYNC_ATTR|SYNC_REFCACHE; + + if (waitfor == MNT_WAIT) + flags |= SYNC_WAIT; + else if (waitfor == MNT_LAZY) + flags |= SYNC_BDFLUSH; + XVFS_SYNC(MNTTOVFS(mp), flags, td->td_ucred, error); + return (error); +} + +static int +_xfs_vget(mp, ino, flags, vpp) + struct mount *mp; + ino_t ino; + int flags; + struct vnode **vpp; +{ + xfs_vnode_t *vp; + int error; + + printf("XVFS_GET_VNODE(MNTTOVFS(mp), &vp, ino, error);\n"); + error = ENOSYS; + if (error == 0) + *vpp = vp->v_vnode; + return (error); +} + +static int +_xfs_fhtovp(mp, fidp, vpp) + struct mount *mp; + struct fid *fidp; + struct vnode **vpp; +{ + printf("xfs_fhtovp\n"); + return ENOSYS; +} + +static int +_xfs_checkexp(mp, nam, extflagsp, credanonp) + struct mount *mp; + struct sockaddr *nam; + int *extflagsp; + struct ucred **credanonp; +{ + printf("xfs_checkexp\n"); + return ENOSYS; +} + +static int +_xfs_vptofh(vp, fhp) + struct vnode *vp; + struct fid *fhp; +{ + printf("xfs_vptofh"); + return ENOSYS; +} + +static int +_xfs_extattrctl(struct mount *mp, int cm, + struct vnode *filename_v, + int attrnamespace, const char *attrname, + struct thread *td) +{ + printf("xfs_extattrctl\n"); + return ENOSYS; +} + +int +_xfs_init(vfsp) + struct vfsconf *vfsp; +{ + int error; + + error = init_xfs_fs(); + + return (error); +} + +int +_xfs_uninit(vfsp) + struct vfsconf *vfsp; +{ + exit_xfs_fs(); + return 0; +} + +static struct vfsops xfs_fsops = { + .vfs_mount = _xfs_mount, + .vfs_unmount = _xfs_unmount, + .vfs_root = _xfs_root, + .vfs_quotactl = _xfs_quotactl, + .vfs_statfs = _xfs_statfs, + .vfs_sync = _xfs_sync, + .vfs_vget = _xfs_vget, + .vfs_fhtovp = _xfs_fhtovp, + .vfs_checkexp = _xfs_checkexp, + .vfs_vptofh = _xfs_vptofh, + .vfs_init = _xfs_init, + .vfs_uninit = _xfs_uninit, + .vfs_extattrctl = _xfs_extattrctl, +}; + +/* XXX: Read-only for now */ +VFS_SET(xfs_fsops, xfs, VFCF_READONLY); + +/* + * Copy GEOM VFS functions here to provide a conveniet place to + * track all XFS-related IO without being distracted by other + * filesystems which happen to be mounted on the machine at the + * same time. + */ + +static void +xfs_geom_biodone(struct bio *bip) +{ + struct buf *bp; + + if (bip->bio_error) { + printf("g_vfs_done():"); + g_print_bio(bip); + printf("error = %d\n", bip->bio_error); + } + bp = bip->bio_caller2; + bp->b_error = bip->bio_error; + bp->b_ioflags = bip->bio_flags; + if (bip->bio_error) + bp->b_ioflags |= BIO_ERROR; + bp->b_resid = bp->b_bcount - bip->bio_completed; + g_destroy_bio(bip); + mtx_lock(&Giant); + bufdone(bp); + mtx_unlock(&Giant); +} + +static void +xfs_geom_strategy(struct bufobj *bo, struct buf *bp) +{ + struct g_consumer *cp; + struct bio *bip; + + cp = bo->bo_private; + G_VALID_CONSUMER(cp); + + bip = g_alloc_bio(); + bip->bio_cmd = bp->b_iocmd; + bip->bio_offset = bp->b_iooffset; + bip->bio_data = bp->b_data; + bip->bio_done = xfs_geom_biodone; + bip->bio_caller2 = bp; + bip->bio_length = bp->b_bcount; + g_io_request(bip, cp); +} + +static int +xfs_geom_bufwrite(struct buf *bp) +{ + return bufwrite(bp); +} + +struct buf_ops xfs_ops = { + .bop_name = "XFS", + .bop_write = xfs_geom_bufwrite, + .bop_strategy = xfs_geom_strategy, + .bop_sync = bufsync, +}; diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_mountops.h b/sys/gnu/fs/xfs/FreeBSD/xfs_mountops.h new file mode 100644 index 000000000000..c8a766d5155f --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_mountops.h @@ -0,0 +1,59 @@ +/*- + * Copyright (c) 2001 Alexander Kabaev + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ +#ifndef _XFS_XFS_H_ +#define _XFS_XFS_H_ + +#define XFSFS_VMAJOR 0 +#define XFS_VMINOR 1 +#define XFS_VERSION ((XFS_VMAJOR << 16) | XFS_VMINOR) +#define XFS_NAME "xfs" + +#ifdef _KERNEL + +struct xfsmount { + struct xfs_mount_args m_args; /* Mount parameters */ + struct mount * m_mp; /* Back pointer */ + xfs_vfs_t m_vfs; /* SHOULD BE FIRST */ +}; + +#define XFSTOMNT(xmp) ((xmp)->m_mp) +#define XFSTOVFS(xmp) (&(xmp)->m_vfs) + +#define MNTTOXFS(mp) ((struct xfsmount *)((mp)->mnt_data)) +#define MNTTOVFS(mp) XFSTOVFS(MNTTOXFS(mp)) + +#define VFSTOMNT(vfsp) (vfsp)->vfs_mp +#define VFSTOXFS(vfsp) MNTTOXFS(VFSTOMNT(vfsp)) + +struct xfsmount *xfsmount_allocate(struct mount *mp); +void xfsmount_deallocate(struct xfsmount *xmp); + +#endif /* _KERNEL */ + +#endif /* _XFS_XFS_H*/ + diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_node.h b/sys/gnu/fs/xfs/FreeBSD/xfs_node.h new file mode 100644 index 000000000000..d8b613c3c50a --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_node.h @@ -0,0 +1,16 @@ +#ifndef __XFS_NODE_H__ +#define __XFS_NODE_H__ + +/* + * Save one allocation on FreeBSD and always allocate both inode and + * xfs_vnode struct as a single memory block. + */ +struct xfs_node +{ + struct xfs_inode n_inode; + struct xfs_vnode n_vnode; +}; + +#define XFS_CAST_IP2VP(ip) (&((struct xfs_node *)(ip))->n_vnode) + +#endif /* __XFS_NODE_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_stats.c b/sys/gnu/fs/xfs/FreeBSD/xfs_stats.c new file mode 100644 index 000000000000..04582ee77d1e --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_stats.c @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" + +struct xfsstats xfsstats; + +STATIC int +xfs_read_xfsstats( + char *buffer, + char **start, + off_t offset, + int count, + int *eof, + void *data) +{ + int i, j, len; + static struct xstats_entry { + char *desc; + int endpoint; + } xstats[] = { + { "extent_alloc", XFSSTAT_END_EXTENT_ALLOC }, + { "abt", XFSSTAT_END_ALLOC_BTREE }, + { "blk_map", XFSSTAT_END_BLOCK_MAPPING }, + { "bmbt", XFSSTAT_END_BLOCK_MAP_BTREE }, + { "dir", XFSSTAT_END_DIRECTORY_OPS }, + { "trans", XFSSTAT_END_TRANSACTIONS }, + { "ig", XFSSTAT_END_INODE_OPS }, + { "log", XFSSTAT_END_LOG_OPS }, + { "push_ail", XFSSTAT_END_TAIL_PUSHING }, + { "xstrat", XFSSTAT_END_WRITE_CONVERT }, + { "rw", XFSSTAT_END_READ_WRITE_OPS }, + { "attr", XFSSTAT_END_ATTRIBUTE_OPS }, + { "icluster", XFSSTAT_END_INODE_CLUSTER }, + { "vnodes", XFSSTAT_END_VNODE_OPS }, + }; + + for (i=j=len = 0; i < sizeof(xstats)/sizeof(struct xstats_entry); i++) { + len += sprintf(buffer + len, xstats[i].desc); + /* inner loop does each group */ + while (j < xstats[i].endpoint) { + len += sprintf(buffer + len, " %u", + *(((__u32*)&xfsstats) + j)); + j++; + } + buffer[len++] = '\n'; + } + /* extra precision counters */ + len += sprintf(buffer + len, "xpc %ju %ju %ju\n", + (uintmax_t)xfsstats.xs_xstrat_bytes, + (uintmax_t)xfsstats.xs_write_bytes, + (uintmax_t)xfsstats.xs_read_bytes); + + if (offset >= len) { + *start = buffer; + *eof = 1; + return 0; + } + *start = buffer + offset; + if ((len -= offset) > count) + return count; + *eof = 1; + + return len; +} + +void +xfs_init_procfs(void) +{ + if (&xfs_read_xfsstats != NULL); +} + +void +xfs_cleanup_procfs(void) +{ +} diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_stats.h b/sys/gnu/fs/xfs/FreeBSD/xfs_stats.h new file mode 100644 index 000000000000..04ddc95d46f4 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_stats.h @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_STATS_H__ +#define __XFS_STATS_H__ + + +#if !defined(XFS_STATS_OFF) + +/* + * XFS global statistics + */ +struct xfsstats { +# define XFSSTAT_END_EXTENT_ALLOC 4 + __uint32_t xs_allocx; + __uint32_t xs_allocb; + __uint32_t xs_freex; + __uint32_t xs_freeb; +# define XFSSTAT_END_ALLOC_BTREE (XFSSTAT_END_EXTENT_ALLOC+4) + __uint32_t xs_abt_lookup; + __uint32_t xs_abt_compare; + __uint32_t xs_abt_insrec; + __uint32_t xs_abt_delrec; +# define XFSSTAT_END_BLOCK_MAPPING (XFSSTAT_END_ALLOC_BTREE+7) + __uint32_t xs_blk_mapr; + __uint32_t xs_blk_mapw; + __uint32_t xs_blk_unmap; + __uint32_t xs_add_exlist; + __uint32_t xs_del_exlist; + __uint32_t xs_look_exlist; + __uint32_t xs_cmp_exlist; +# define XFSSTAT_END_BLOCK_MAP_BTREE (XFSSTAT_END_BLOCK_MAPPING+4) + __uint32_t xs_bmbt_lookup; + __uint32_t xs_bmbt_compare; + __uint32_t xs_bmbt_insrec; + __uint32_t xs_bmbt_delrec; +# define XFSSTAT_END_DIRECTORY_OPS (XFSSTAT_END_BLOCK_MAP_BTREE+4) + __uint32_t xs_dir_lookup; + __uint32_t xs_dir_create; + __uint32_t xs_dir_remove; + __uint32_t xs_dir_getdents; +# define XFSSTAT_END_TRANSACTIONS (XFSSTAT_END_DIRECTORY_OPS+3) + __uint32_t xs_trans_sync; + __uint32_t xs_trans_async; + __uint32_t xs_trans_empty; +# define XFSSTAT_END_INODE_OPS (XFSSTAT_END_TRANSACTIONS+7) + __uint32_t xs_ig_attempts; + __uint32_t xs_ig_found; + __uint32_t xs_ig_frecycle; + __uint32_t xs_ig_missed; + __uint32_t xs_ig_dup; + __uint32_t xs_ig_reclaims; + __uint32_t xs_ig_attrchg; +# define XFSSTAT_END_LOG_OPS (XFSSTAT_END_INODE_OPS+5) + __uint32_t xs_log_writes; + __uint32_t xs_log_blocks; + __uint32_t xs_log_noiclogs; + __uint32_t xs_log_force; + __uint32_t xs_log_force_sleep; +# define XFSSTAT_END_TAIL_PUSHING (XFSSTAT_END_LOG_OPS+10) + __uint32_t xs_try_logspace; + __uint32_t xs_sleep_logspace; + __uint32_t xs_push_ail; + __uint32_t xs_push_ail_success; + __uint32_t xs_push_ail_pushbuf; + __uint32_t xs_push_ail_pinned; + __uint32_t xs_push_ail_locked; + __uint32_t xs_push_ail_flushing; + __uint32_t xs_push_ail_restarts; + __uint32_t xs_push_ail_flush; +# define XFSSTAT_END_WRITE_CONVERT (XFSSTAT_END_TAIL_PUSHING+2) + __uint32_t xs_xstrat_quick; + __uint32_t xs_xstrat_split; +# define XFSSTAT_END_READ_WRITE_OPS (XFSSTAT_END_WRITE_CONVERT+2) + __uint32_t xs_write_calls; + __uint32_t xs_read_calls; +# define XFSSTAT_END_ATTRIBUTE_OPS (XFSSTAT_END_READ_WRITE_OPS+4) + __uint32_t xs_attr_get; + __uint32_t xs_attr_set; + __uint32_t xs_attr_remove; + __uint32_t xs_attr_list; +# define XFSSTAT_END_INODE_CLUSTER (XFSSTAT_END_ATTRIBUTE_OPS+3) + __uint32_t xs_iflush_count; + __uint32_t xs_icluster_flushcnt; + __uint32_t xs_icluster_flushinode; +# define XFSSTAT_END_VNODE_OPS (XFSSTAT_END_INODE_CLUSTER+8) + __uint32_t vn_active; /* # vnodes not on free lists */ + __uint32_t vn_alloc; /* # times vn_alloc called */ + __uint32_t vn_get; /* # times vn_get called */ + __uint32_t vn_hold; /* # times vn_hold called */ + __uint32_t vn_rele; /* # times vn_rele called */ + __uint32_t vn_reclaim; /* # times vn_reclaim called */ + __uint32_t vn_remove; /* # times vn_remove called */ + __uint32_t vn_free; /* # times vn_free called */ +#define XFSSTAT_END_BUF (XFSSTAT_END_VNODE_OPS+9) + __uint32_t pb_get; + __uint32_t pb_create; + __uint32_t pb_get_locked; + __uint32_t pb_get_locked_waited; + __uint32_t pb_busy_locked; + __uint32_t pb_miss_locked; + __uint32_t pb_page_retries; + __uint32_t pb_page_found; + __uint32_t pb_get_read; +/* Extra precision counters */ + __uint64_t xs_xstrat_bytes; + __uint64_t xs_write_bytes; + __uint64_t xs_read_bytes; +}; + +extern struct xfsstats xfsstats; + +# define XFS_STATS_INC(count) ( xfsstats.count++ ) +# define XFS_STATS_DEC(count) ( xfsstats.count-- ) +# define XFS_STATS_ADD(count, inc) ( xfsstats.count += (inc) ) + +extern void xfs_init_procfs(void); +extern void xfs_cleanup_procfs(void); + + +#else /* !CONFIG_PROC_FS */ + +# define XFS_STATS_INC(count) +# define XFS_STATS_DEC(count) +# define XFS_STATS_ADD(count, inc) + +static __inline void xfs_init_procfs(void) { }; +static __inline void xfs_cleanup_procfs(void) { }; + +#endif /* !CONFIG_PROC_FS */ + +#endif /* __XFS_STATS_H__ */ + diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_super.c b/sys/gnu/fs/xfs/FreeBSD/xfs_super.c new file mode 100644 index 000000000000..7f23f42b9d72 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_super.c @@ -0,0 +1,338 @@ +/* + * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" + +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_clnt.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_dir.h" +#include "xfs_dir2.h" +#include "xfs_alloc.h" +#include "xfs_dmapi.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_alloc_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_attr_sf.h" +#include "xfs_dir_sf.h" +#include "xfs_dir2_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_bmap.h" +#include "xfs_bit.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_itable.h" +#include "xfs_rw.h" +#include "xfs_acl.h" +#include "xfs_cap.h" +#include "xfs_mac.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_utils.h" +#include "xfs_version.h" + +#include <geom/geom.h> +#include <geom/geom_vfs.h> + +extern struct vop_vector xfs_fifoops; +extern struct buf_ops xfs_ops; + +__uint64_t +xfs_max_file_offset( + unsigned int blockshift) +{ + + return (OFF_MAX); +} + +void +xfs_initialize_vnode( + bhv_desc_t *bdp, + xfs_vnode_t *vp, + bhv_desc_t *inode_bhv, + int unlock) +{ + xfs_inode_t *ip = XFS_BHVTOI(inode_bhv); + + if (!inode_bhv->bd_vobj) { + vp->v_vfsp = bhvtovfs(bdp); + bhv_desc_init(inode_bhv, ip, vp, &xfs_vnodeops); + bhv_insert(VN_BHV_HEAD(vp), inode_bhv); + } + + /* + * XXX: Use VNON as an indication of freshly allocated vnode + * which need to be initialized and unlocked. + * This is _not_ like the same place in Linux version of + * routine. + */ + if (vp->v_type != VNON) + return; + + vp->v_type = IFTOVT(ip->i_d.di_mode); + vp->v_vnode->v_type = vp->v_type; + + if (vp->v_type == VFIFO) + vp->v_vnode->v_op = &xfs_fifoops; + + ASSERT_VOP_LOCKED(vp->v_vnode, "xfs_initialize_vnode"); + + /* For new inodes we need to set the ops vectors, + * and unlock the inode. + */ + if (unlock) + VOP_UNLOCK(vp->v_vnode, 0, curthread); +} + +struct vnode * +xfs_get_inode( + bhv_desc_t *bdp, + xfs_ino_t ino, + int flags) +{ + return NULL; +} + +void +xfs_flush_inode( + xfs_inode_t *ip) +{ + printf("xfs_flush_inode NI\n"); +} + +void +xfs_flush_device( + xfs_inode_t *ip) +{ + printf("xfs_flush_device NI\n"); + xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); +} + +/*ARGSUSED*/ +int +xfs_blkdev_get( + xfs_mount_t *mp, + const char *name, + struct vnode **bdevp) +{ + struct nameidata nd; + struct nameidata *ndp = &nd; + int error, ronly; + struct thread *td; + struct vnode *devvp; + struct g_consumer *cp; + struct g_provider *pp; + mode_t accessmode; + + td = curthread; + + NDINIT(ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, name, td); + if ((error = namei(ndp)) != 0) + return (error); + NDFREE(ndp, NDF_ONLY_PNBUF); + devvp = ndp->ni_vp; + + if (!vn_isdisk(devvp, &error)) { + vrele(devvp); + return (error); + } + + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); + + ronly = ((XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY) != 0); + if (suser(td)) { + accessmode = VREAD; + if (!ronly) + accessmode |= VWRITE; + if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){ + vput(devvp); + return (error); + } + } + + DROP_GIANT(); + g_topology_lock(); + + /* + * XXX: Do not allow more than one consumer to open a device + * associated with a particular GEOM provider. + * This disables multiple read-only mounts of a device, + * but it gets rid of panics in bmemfree() when you try to + * mount the same device more than once. + * During mounting, XFS does a bread() of the superblock, but does + * not brelse() it. A subsequent mount of the same device + * will try to bread() the superblock, resulting in a panic in + * bremfree(), "buffer not on queue". + */ + pp = g_dev_getprovider(devvp->v_rdev); + if ((pp != NULL) && ((pp->acr | pp->acw | pp->ace ) != 0)) + error = EPERM; + else + error = g_vfs_open(devvp, &cp, "xfs", ronly ? 0 : 1); + + g_topology_unlock(); + PICKUP_GIANT(); + + VOP_UNLOCK(devvp, 0, td); + if (error) { + vput(devvp); + return (error); + } + + devvp->v_bufobj.bo_private = cp; + devvp->v_bufobj.bo_ops = &xfs_ops; + + *bdevp = devvp; + return (0); +} + +void +xfs_blkdev_put( + struct vnode *devvp) +{ + struct g_consumer *cp; + + if (devvp == NULL) + return; + + vinvalbuf(devvp, V_SAVE, curthread, 0, 0); + + cp = devvp->v_bufobj.bo_private; + DROP_GIANT(); + g_topology_lock(); + g_wither_geom_close(cp->geom, ENXIO); + g_topology_unlock(); + PICKUP_GIANT(); + + vrele(devvp); +} + +void +xfs_flush_buftarg( + xfs_buftarg_t *btp) +{ + printf("xfs_flush_buftarg NI %p\n",btp); +} + +void +xfs_free_buftarg( + xfs_buftarg_t *btp) +{ + xfs_flush_buftarg(btp); + kmem_free(btp, sizeof(*btp)); +} + +int +xfs_readonly_buftarg( + xfs_buftarg_t *btp) +{ + struct g_consumer *cp; + + KASSERT(btp->specvp->v_bufobj.bo_ops == &xfs_ops, + ("Bogus xfs_buftarg_t pointer")); + cp = btp->specvp->v_bufobj.bo_private; + + return (cp->acw == 0); +} + +void +xfs_relse_buftarg( + xfs_buftarg_t *btp) +{ + printf("xfs_readonly_buftarg NI %p\n",btp); +} + +unsigned int +xfs_getsize_buftarg( + xfs_buftarg_t *btp) +{ + struct g_consumer *cp; + cp = btp->specvp->v_bufobj.bo_private; + return (cp->provider->sectorsize); +} + +void +xfs_setsize_buftarg( + xfs_buftarg_t *btp, + unsigned int blocksize, + unsigned int sectorsize) +{ + printf("xfs_setsize_buftarg NI %p\n",btp); +} + +xfs_buftarg_t * +xfs_alloc_buftarg( + struct vnode *bdev) +{ + xfs_buftarg_t *btp; + + btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); + + btp->dev = bdev->v_rdev; + btp->specvp = bdev; + + return btp; +} + +int +init_xfs_fs( void ) +{ + static char message[] = + XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n"; + + printf(message); + + vn_init(); + xfs_init(); + uuid_init(); + vfs_initdmapi(); + vfs_initquota(); + + return 0; +} + +void +exit_xfs_fs(void) +{ + xfs_cleanup(); + vfs_exitquota(); + vfs_exitdmapi(); + uuid_cleanup(); +} + diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_super.h b/sys/gnu/fs/xfs/FreeBSD/xfs_super.h new file mode 100644 index 000000000000..2665806ed943 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_super.h @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_SUPER_H__ +#define __XFS_SUPER_H__ + +#ifdef CONFIG_XFS_DMAPI +# define vfs_insertdmapi(vfs) vfs_insertops(vfsp, &xfs_dmops) +# define vfs_initdmapi() dmapi_init() +# define vfs_exitdmapi() dmapi_uninit() +#else +# define vfs_insertdmapi(vfs) do { } while (0) +# define vfs_initdmapi() do { } while (0) +# define vfs_exitdmapi() do { } while (0) +#endif + +#ifdef CONFIG_XFS_QUOTA +# define vfs_insertquota(vfs) vfs_insertops(vfsp, &xfs_qmops) +# define vfs_initquota() xfs_qm_init() +# define vfs_exitquota() xfs_qm_exit() +#else +# define vfs_insertquota(vfs) do { } while (0) +# define vfs_initquota() do { } while (0) +# define vfs_exitquota() do { } while (0) +#endif + +#ifdef CONFIG_XFS_POSIX_ACL +# define XFS_ACL_STRING "ACLs, " +# define set_posix_acl_flag(sb) ((sb)->s_flags |= MS_POSIXACL) +#else +# define XFS_ACL_STRING +# define set_posix_acl_flag(sb) do { } while (0) +#endif + +#ifdef CONFIG_XFS_SECURITY +# define XFS_SECURITY_STRING "security attributes, " +# define ENOSECURITY 0 +#else +# define XFS_SECURITY_STRING +# define ENOSECURITY EOPNOTSUPP +#endif + +#ifdef CONFIG_XFS_RT +# define XFS_REALTIME_STRING "realtime, " +#else +# define XFS_REALTIME_STRING +#endif + +#if XFS_BIG_BLKNOS +# if XFS_BIG_INUMS +# define XFS_BIGFS_STRING "large block/inode numbers, " +# else +# define XFS_BIGFS_STRING "large block numbers, " +# endif +#else +# define XFS_BIGFS_STRING +#endif + +#ifdef CONFIG_XFS_TRACE +# define XFS_TRACE_STRING "tracing, " +#else +# define XFS_TRACE_STRING +#endif + +#ifdef XFSDEBUG +# define XFS_DBG_STRING "debug" +#else +# define XFS_DBG_STRING "no debug" +#endif + +#define XFS_BUILD_OPTIONS XFS_ACL_STRING \ + XFS_SECURITY_STRING \ + XFS_REALTIME_STRING \ + XFS_BIGFS_STRING \ + XFS_TRACE_STRING \ + XFS_DBG_STRING /* DBG must be last */ + +struct xfs_inode; +struct xfs_mount; +struct xfs_buftarg; + +extern __uint64_t xfs_max_file_offset(unsigned int); + +extern void xfs_initialize_vnode(bhv_desc_t *, xfs_vnode_t *, bhv_desc_t *, int); + +extern struct vnode * xfs_get_inode( bhv_desc_t *, xfs_ino_t, int); +extern void xfs_flush_inode(struct xfs_inode *); +extern void xfs_flush_device(struct xfs_inode *); + +extern int xfs_blkdev_get(struct xfs_mount *, const char *, + struct block_device **); +extern void xfs_blkdev_put(struct block_device *); + +extern struct xfs_buftarg *xfs_alloc_buftarg(struct vnode *); +extern void xfs_relse_buftarg(struct xfs_buftarg *); +extern void xfs_free_buftarg(struct xfs_buftarg *); +extern void xfs_flush_buftarg(struct xfs_buftarg *); +extern int xfs_readonly_buftarg(struct xfs_buftarg *); +extern void xfs_setsize_buftarg(struct xfs_buftarg *, unsigned int, unsigned int); +extern unsigned int xfs_getsize_buftarg(struct xfs_buftarg *); + +extern int init_xfs_fs(void); +extern void exit_xfs_fs(void); + +#endif /* __XFS_SUPER_H__ */ + diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_sysctl.c b/sys/gnu/fs/xfs/FreeBSD/xfs_sysctl.c new file mode 100644 index 000000000000..9ba2d45b33c1 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_sysctl.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" + +void +xfs_sysctl_register(void) +{ +} + +void +xfs_sysctl_unregister(void) +{ +} diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_sysctl.h b/sys/gnu/fs/xfs/FreeBSD/xfs_sysctl.h new file mode 100644 index 000000000000..b4b58b487c45 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_sysctl.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#ifndef __XFS_SYSCTL_H__ +#define __XFS_SYSCTL_H__ + +/* + * Tunable xfs parameters + */ + +typedef struct xfs_sysctl_val { + int min; + int val; + int max; +} xfs_sysctl_val_t; + +typedef struct xfs_param { + xfs_sysctl_val_t refcache_size; /* Size of NFS reference cache. */ + xfs_sysctl_val_t refcache_purge;/* # of entries to purge each time. */ + xfs_sysctl_val_t restrict_chown;/* Root/non-root can give away files.*/ + xfs_sysctl_val_t sgid_inherit; /* Inherit S_ISGID bit if process' GID + * is not a member of the parent dir + * GID */ + xfs_sysctl_val_t symlink_mode; /* Link creat mode affected by umask */ + xfs_sysctl_val_t panic_mask; /* bitmask to cause panic on errors. */ + xfs_sysctl_val_t error_level; /* Degree of reporting for problems */ + xfs_sysctl_val_t sync_interval; /* time between sync calls */ + xfs_sysctl_val_t stats_clear; /* Reset all XFS statistics to zero. */ + xfs_sysctl_val_t probe_dmapi; /* probe for DMAPI module on mount. */ + xfs_sysctl_val_t probe_ioops; /* probe for an IO module on mount. */ + xfs_sysctl_val_t probe_quota; /* probe for quota module on mount. */ + xfs_sysctl_val_t inherit_sync; /* Inherit the "sync" inode flag. */ + xfs_sysctl_val_t inherit_nodump;/* Inherit the "nodump" inode flag. */ + xfs_sysctl_val_t inherit_noatim;/* Inherit the "noatime" inode flag. */ + xfs_sysctl_val_t flush_interval;/* interval between runs of the + * delwri flush daemon. */ + xfs_sysctl_val_t age_buffer; /* time for buffer to age before + * we flush it. */ + xfs_sysctl_val_t io_bypass; /* toggle for directio io bypass */ +} xfs_param_t; + +/* + * xfs_error_level: + * + * How much error reporting will be done when internal problems are + * encountered. These problems normally return an EFSCORRUPTED to their + * caller, with no other information reported. + * + * 0 No error reports + * 1 Report EFSCORRUPTED errors that will cause a filesystem shutdown + * 5 Report all EFSCORRUPTED errors (all of the above errors, plus any + * additional errors that are known to not cause shutdowns) + * + * xfs_panic_mask bit 0x8 turns the error reports into panics + */ + +enum { + XFS_REFCACHE_SIZE = 1, + XFS_REFCACHE_PURGE = 2, + XFS_RESTRICT_CHOWN = 3, + XFS_SGID_INHERIT = 4, + XFS_SYMLINK_MODE = 5, + XFS_PANIC_MASK = 6, + XFS_ERRLEVEL = 7, + XFS_SYNC_INTERVAL = 8, + XFS_PROBE_DMAPI = 9, + XFS_PROBE_IOOPS = 10, + XFS_PROBE_QUOTA = 11, + XFS_STATS_CLEAR = 12, + XFS_INHERIT_SYNC = 13, + XFS_INHERIT_NODUMP = 14, + XFS_INHERIT_NOATIME = 15, + XFS_FLUSH_INTERVAL = 16, + XFS_AGE_BUFFER = 17, + XFS_IO_BYPASS = 18, +}; + +extern xfs_param_t xfs_params; + +#ifdef CONFIG_SYSCTL +extern void xfs_sysctl_register(void); +extern void xfs_sysctl_unregister(void); +#else +# define xfs_sysctl_register() do { } while (0) +# define xfs_sysctl_unregister() do { } while (0) +#endif /* CONFIG_SYSCTL */ + +#endif /* __XFS_SYSCTL_H__ */ + diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_version.h b/sys/gnu/fs/xfs/FreeBSD/xfs_version.h new file mode 100644 index 000000000000..96f96394417e --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_version.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * Dummy file that can contain a timestamp to put into the + * XFS init string, to help users keep track of what they're + * running + */ + +#ifndef __XFS_VERSION_H__ +#define __XFS_VERSION_H__ + +#define XFS_VERSION_STRING "SGI XFS" + +#endif /* __XFS_VERSION_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_vfs.c b/sys/gnu/fs/xfs/FreeBSD/xfs_vfs.c new file mode 100644 index 000000000000..a621e200ba95 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_vfs.c @@ -0,0 +1,372 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_macros.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_clnt.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir.h" +#include "xfs_dir2.h" +#include "xfs_imap.h" +#include "xfs_alloc.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_quota.h" + +#include "xfs_mountops.h" + +int +xvfs_mount( + struct bhv_desc *bdp, + struct xfs_mount_args *args, + struct cred *cr) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->xvfs_mount) + next = BHV_NEXT(next); + return ((*bhvtovfsops(next)->xvfs_mount)(next, args, cr)); +} + +int +xvfs_parseargs( + struct bhv_desc *bdp, + char *s, + struct xfs_mount_args *args, + int f) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->xvfs_parseargs) + next = BHV_NEXT(next); + return ((*bhvtovfsops(next)->xvfs_parseargs)(next, s, args, f)); +} + +int +xvfs_showargs( + struct bhv_desc *bdp, + struct sbuf *m) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->xvfs_showargs) + next = BHV_NEXT(next); + return ((*bhvtovfsops(next)->xvfs_showargs)(next, m)); +} + +int +xvfs_unmount( + struct bhv_desc *bdp, + int fl, + struct cred *cr) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->xvfs_unmount) + next = BHV_NEXT(next); + return ((*bhvtovfsops(next)->xvfs_unmount)(next, fl, cr)); +} + +int +xvfs_mntupdate( + struct bhv_desc *bdp, + int *fl, + struct xfs_mount_args *args) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->xvfs_mntupdate) + next = BHV_NEXT(next); + return ((*bhvtovfsops(next)->xvfs_mntupdate)(next, fl, args)); +} + +int +xvfs_root( + struct bhv_desc *bdp, + struct xfs_vnode **vpp) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->xvfs_root) + next = BHV_NEXT(next); + return ((*bhvtovfsops(next)->xvfs_root)(next, vpp)); +} + +int +xvfs_statvfs( + struct bhv_desc *bdp, + struct statfs *sp, + struct xfs_vnode *vp) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->xvfs_statvfs) + next = BHV_NEXT(next); + return ((*bhvtovfsops(next)->xvfs_statvfs)(next, sp, vp)); +} + +int +xvfs_sync( + struct bhv_desc *bdp, + int fl, + struct cred *cr) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->xvfs_sync) + next = BHV_NEXT(next); + return ((*bhvtovfsops(next)->xvfs_sync)(next, fl, cr)); +} + +int +xvfs_vget( + struct bhv_desc *bdp, + struct xfs_vnode **vpp, + struct fid *fidp) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->xvfs_vget) + next = BHV_NEXT(next); + return ((*bhvtovfsops(next)->xvfs_vget)(next, vpp, fidp)); +} + +int +xvfs_dmapiops( + struct bhv_desc *bdp, + caddr_t addr) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->xvfs_dmapiops) + next = BHV_NEXT(next); + return ((*bhvtovfsops(next)->xvfs_dmapiops)(next, addr)); +} + +int +xvfs_quotactl( + struct bhv_desc *bdp, + int cmd, + int id, + caddr_t addr) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->xvfs_quotactl) + next = BHV_NEXT(next); + return ((*bhvtovfsops(next)->xvfs_quotactl)(next, cmd, id, addr)); +} + +struct inode * +xvfs_get_inode( + struct bhv_desc *bdp, + xfs_ino_t ino, + int fl) +{ + struct bhv_desc *next = bdp; + + while (! (bhvtovfsops(next))->xvfs_get_inode) + next = BHV_NEXTNULL(next); + return ((*bhvtovfsops(next)->xvfs_get_inode)(next, ino, fl)); +} + +void +xvfs_init_vnode( + struct bhv_desc *bdp, + struct xfs_vnode *vp, + struct bhv_desc *bp, + int unlock) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->xvfs_init_vnode) + next = BHV_NEXT(next); + ((*bhvtovfsops(next)->xvfs_init_vnode)(next, vp, bp, unlock)); +} + +void +xvfs_force_shutdown( + struct bhv_desc *bdp, + int fl, + char *file, + int line) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->xvfs_force_shutdown) + next = BHV_NEXT(next); + ((*bhvtovfsops(next)->xvfs_force_shutdown)(next, fl, file, line)); +} + +xfs_vfs_t * +vfs_allocate(struct mount *mp) +{ + struct xfs_vfs *vfsp; + struct xfsmount *xmp; + + xmp = kmem_zalloc(sizeof(*xmp), KM_SLEEP); + vfsp = XFSTOVFS(xmp); + + bhv_head_init(VFS_BHVHEAD(vfsp), "vfs"); + + xmp->m_mp = mp; + mp->mnt_data = (qaddr_t)xmp; + vfsp->vfs_mp = mp; + + return vfsp; +} + +void +vfs_deallocate( + struct xfs_vfs *vfsp) +{ + struct xfsmount *xmp; + + bhv_head_destroy(VFS_BHVHEAD(vfsp)); + + xmp = VFSTOXFS(vfsp); + kmem_free(xmp, sizeof(*xmp)); +} + +/* + * Allocate and initialize a new XFS mount structure + */ +struct xfsmount * +xfsmount_allocate(struct mount *mp) +{ + xfs_vfs_t *vfsp; + + vfsp = vfs_allocate(mp); + + ASSERT(vfsp); + + if (mp->mnt_flag & MNT_RDONLY) + vfsp->vfs_flag |= VFS_RDONLY; + + bhv_insert_all_vfsops(vfsp); + return (VFSTOXFS(vfsp)); +} + +void +xfsmount_deallocate(struct xfsmount *xmp) +{ + xfs_vfs_t *vfsp; + + vfsp = XFSTOVFS(xmp); + bhv_remove_all_vfsops(vfsp, 1); + vfs_deallocate(vfsp); +} + + +void +vfs_insertops( + struct xfs_vfs *vfsp, + struct bhv_vfsops *vfsops) +{ + struct bhv_desc *bdp; + + bdp = kmem_alloc(sizeof(struct bhv_desc), KM_SLEEP); + bhv_desc_init(bdp, NULL, vfsp, vfsops); + bhv_insert(&vfsp->vfs_bh, bdp); +} + +void +vfs_insertbhv( + struct xfs_vfs *vfsp, + struct bhv_desc *bdp, + struct xvfsops *vfsops, + void *mount) +{ + bhv_desc_init(bdp, mount, vfsp, vfsops); + bhv_insert_initial(&vfsp->vfs_bh, bdp); +} + +void +bhv_remove_vfsops( + struct xfs_vfs *vfsp, + int pos) +{ + struct bhv_desc *bhv; + + bhv = bhv_lookup_range(&vfsp->vfs_bh, pos, pos); + if (bhv) { + bhv_remove(&vfsp->vfs_bh, bhv); + kmem_free(bhv, sizeof(*bhv)); + } +} + +void +bhv_remove_all_vfsops( + struct xfs_vfs *vfsp, + int freebase) +{ + struct xfs_mount *mp; + + bhv_remove_vfsops(vfsp, VFS_POSITION_QM); + bhv_remove_vfsops(vfsp, VFS_POSITION_DM); + bhv_remove_vfsops(vfsp, VFS_POSITION_IO); + if (!freebase) + return; + mp = XFS_BHVTOM(bhv_lookup(VFS_BHVHEAD(vfsp), &xfs_vfsops)); + VFS_REMOVEBHV(vfsp, &mp->m_bhv); + xfs_mount_free(mp, 0); +} + +void +bhv_insert_all_vfsops( + struct xfs_vfs *vfsp) +{ + struct xfs_mount *mp; + + mp = xfs_mount_init(); + vfs_insertbhv(vfsp, &mp->m_bhv, &xfs_vfsops, mp); + vfs_insertdmapi(vfsp); + vfs_insertquota(vfsp); +} diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_vfs.h b/sys/gnu/fs/xfs/FreeBSD/xfs_vfs.h new file mode 100644 index 000000000000..c16ddcd0310c --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_vfs.h @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_VFS_H__ +#define __XFS_VFS_H__ + +#include <sys/mount.h> +#include "xfs_fs.h" + +struct fid; +struct cred; +struct xfs_vnode; +struct statfs; +struct sbuf; +struct xfs_mount_args; +struct mount; + +typedef struct statfs xfs_statfs_t; + +typedef struct xfs_vfs { + u_int vfs_flag; /* flags */ + xfs_fsid_t vfs_fsid; /* file system ID */ + xfs_fsid_t *vfs_altfsid; /* An ID fixed for life of FS */ + bhv_head_t vfs_bh; /* head of vfs behavior chain */ + struct mount *vfs_mp; /* FreeBSD mount struct */ +} xfs_vfs_t; + +#define MNTTOXVFS(mp) ((struct xfs_vfs*)(mp)->mnt_data) +#define XVFSTOMNT(vfs) ((vfs)->vfs_mp) + +#define vfs_fbhv vfs_bh.bh_first /* 1st on vfs behavior chain */ + +#define bhvtovfs(bdp) ( (struct xfs_vfs *)BHV_VOBJ(bdp) ) +#define bhvtovfsops(bdp) ( (struct xvfsops *)BHV_OPS(bdp) ) +#define VFS_BHVHEAD(vfs) ( &(vfs)->vfs_bh ) +#define VFS_REMOVEBHV(vfs, bdp) ( bhv_remove(VFS_BHVHEAD(vfs), bdp) ) + +#define VFS_POSITION_BASE BHV_POSITION_BASE /* chain bottom */ +#define VFS_POSITION_TOP BHV_POSITION_TOP /* chain top */ +#define VFS_POSITION_INVALID BHV_POSITION_INVALID /* invalid pos. num */ + +typedef enum { + VFS_BHV_UNKNOWN, /* not specified */ + VFS_BHV_XFS, /* xfs */ + VFS_BHV_DM, /* data migration */ + VFS_BHV_QM, /* quota manager */ + VFS_BHV_IO, /* IO path */ + VFS_BHV_END /* housekeeping end-of-range */ +} vfs_bhv_t; + +#define VFS_POSITION_XFS (BHV_POSITION_BASE) +#define VFS_POSITION_DM (VFS_POSITION_BASE+10) +#define VFS_POSITION_QM (VFS_POSITION_BASE+20) +#define VFS_POSITION_IO (VFS_POSITION_BASE+30) + +#define VFS_RDONLY 0x0001 /* read-only vfs */ +#define VFS_GRPID 0x0002 /* group-ID assigned from directory */ +#define VFS_DMI 0x0004 /* filesystem has the DMI enabled */ +#define VFS_UMOUNT 0x0008 /* unmount in progress */ +#define VFS_END 0x0008 /* max flag */ + +#define SYNC_ATTR 0x0001 /* sync attributes */ +#define SYNC_CLOSE 0x0002 /* close file system down */ +#define SYNC_DELWRI 0x0004 /* look at delayed writes */ +#define SYNC_WAIT 0x0008 /* wait for i/o to complete */ +#define SYNC_BDFLUSH 0x0010 /* BDFLUSH is calling -- don't block */ +#define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */ +#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ +#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ + +#define IGET_NOALLOC 0x0001 /* vfs_get_inode may return NULL */ + +typedef int (*xvfs_mount_t)(bhv_desc_t *, + struct xfs_mount_args *, struct cred *); +typedef int (*xvfs_parseargs_t)(bhv_desc_t *, char *, + struct xfs_mount_args *, int); +typedef int (*xvfs_showargs_t)(bhv_desc_t *, struct sbuf *); +typedef int (*xvfs_unmount_t)(bhv_desc_t *, int, struct cred *); +typedef int (*xvfs_mntupdate_t)(bhv_desc_t *, int *, + struct xfs_mount_args *); +typedef int (*xvfs_root_t)(bhv_desc_t *, struct xfs_vnode **); +typedef int (*xvfs_statvfs_t)(bhv_desc_t *, xfs_statfs_t *, struct xfs_vnode *); +typedef int (*xvfs_sync_t)(bhv_desc_t *, int, struct cred *); +typedef int (*xvfs_vget_t)(bhv_desc_t *, struct xfs_vnode **, struct fid *); +typedef int (*xvfs_dmapiops_t)(bhv_desc_t *, caddr_t); +typedef int (*xvfs_quotactl_t)(bhv_desc_t *, int, int, caddr_t); +typedef void (*xvfs_init_vnode_t)(bhv_desc_t *, + struct xfs_vnode *, bhv_desc_t *, int); +typedef void (*xvfs_force_shutdown_t)(bhv_desc_t *, int, char *, int); +typedef struct inode * (*xvfs_get_inode_t)(bhv_desc_t *, xfs_ino_t, int); + +typedef struct xvfsops { + bhv_position_t xvfs_position; /* behavior chain position */ + xvfs_mount_t xvfs_mount; /* mount file system */ + xvfs_parseargs_t xvfs_parseargs; /* parse mount options */ + xvfs_showargs_t xvfs_showargs; /* unparse mount options */ + xvfs_unmount_t xvfs_unmount; /* unmount file system */ + xvfs_mntupdate_t xvfs_mntupdate; /* update file system options */ + xvfs_root_t xvfs_root; /* get root vnode */ + xvfs_statvfs_t xvfs_statvfs; /* file system statistics */ + xvfs_sync_t xvfs_sync; /* flush files */ + xvfs_vget_t xvfs_vget; /* get vnode from fid */ + xvfs_dmapiops_t xvfs_dmapiops; /* data migration */ + xvfs_quotactl_t xvfs_quotactl; /* disk quota */ + xvfs_get_inode_t xvfs_get_inode; /* bhv specific iget */ + xvfs_init_vnode_t xvfs_init_vnode; /* initialize a new vnode */ + xvfs_force_shutdown_t xvfs_force_shutdown; /* crash and burn */ +} xvfsops_t; + +/* + * VFS's. Operates on vfs structure pointers (starts at bhv head). + */ +#define VHEAD(v) ((v)->vfs_fbhv) +#define XVFS_MOUNT(v, ma,cr, rv) ((rv) = xvfs_mount(VHEAD(v), ma,cr)) +#define XVFS_PARSEARGS(v, o,ma,f, rv) ((rv) = xvfs_parseargs(VHEAD(v), o,ma,f)) +#define XVFS_SHOWARGS(v, m, rv) ((rv) = xvfs_showargs(VHEAD(v), m)) +#define XVFS_UNMOUNT(v, f, cr, rv) ((rv) = xvfs_unmount(VHEAD(v), f,cr)) +#define XVFS_MNTUPDATE(v, fl, args, rv) ((rv) = xvfs_mntupdate(VHEAD(v), fl, args)) +#define XVFS_ROOT(v, vpp, rv) ((rv) = xvfs_root(VHEAD(v), vpp)) +#define XVFS_STATVFS(v, sp,vp, rv) ((rv) = xvfs_statvfs(VHEAD(v), sp,vp)) +#define XVFS_SYNC(v, flag,cr, rv) ((rv) = xvfs_sync(VHEAD(v), flag,cr)) +#define XVFS_VGET(v, vpp,fidp, rv) ((rv) = xvfs_vget(VHEAD(v), vpp,fidp)) +#define XVFS_DMAPIOPS(v, p, rv) ((rv) = xvfs_dmapiops(VHEAD(v), p)) +#define XVFS_QUOTACTL(v, c,id,p, rv) ((rv) = xvfs_quotactl(VHEAD(v), c,id,p)) +#define XVFS_GET_INODE(v, ino, fl) ( xvfs_get_inode(VHEAD(v), ino,fl) ) +#define XVFS_INIT_VNODE(v, vp,b,ul) ( xvfs_init_vnode(VHEAD(v), vp,b,ul) ) +#define XVFS_FORCE_SHUTDOWN(v, fl,f,l) ( xvfs_force_shutdown(VHEAD(v), fl,f,l) ) + +/* + * PVFS's. Operates on behavior descriptor pointers. + */ +#define PVFS_MOUNT(b, ma,cr, rv) ((rv) = xvfs_mount(b, ma,cr)) +#define PVFS_PARSEARGS(b, o,ma,f, rv) ((rv) = xvfs_parseargs(b, o,ma,f)) +#define PVFS_SHOWARGS(b, m, rv) ((rv) = xvfs_showargs(b, m)) +#define PVFS_UNMOUNT(b, f,cr, rv) ((rv) = xvfs_unmount(b, f,cr)) +#define PVFS_MNTUPDATE(b, fl, args, rv) ((rv) = xvfs_mntupdate(b, fl, args)) +#define PVFS_ROOT(b, vpp, rv) ((rv) = xvfs_root(b, vpp)) +#define PVFS_STATVFS(b, sp,vp, rv) ((rv) = xvfs_statvfs(b, sp,vp)) +#define PVFS_SYNC(b, flag,cr, rv) ((rv) = xvfs_sync(b, flag,cr)) +#define PVFS_VGET(b, vpp,fidp, rv) ((rv) = xvfs_vget(b, vpp,fidp)) +#define PVFS_DMAPIOPS(b, p, rv) ((rv) = xvfs_dmapiops(b, p)) +#define PVFS_QUOTACTL(b, c,id,p, rv) ((rv) = xvfs_quotactl(b, c,id,p)) +#define PVFS_GET_INODE(b, ino,fl) ( xvfs_get_inode(b, ino,fl) ) +#define PVFS_INIT_VNODE(b, vp,b2,ul) ( xvfs_init_vnode(b, vp,b2,ul) ) +#define PVFS_FORCE_SHUTDOWN(b, fl,f,l) ( xvfs_force_shutdown(b, fl,f,l) ) + +extern int xvfs_mount(bhv_desc_t *, struct xfs_mount_args *, struct cred *); +extern int xvfs_parseargs(bhv_desc_t *, char *, struct xfs_mount_args *, int); +extern int xvfs_showargs(bhv_desc_t *, struct sbuf *); +extern int xvfs_unmount(bhv_desc_t *, int, struct cred *); +extern int xvfs_mntupdate(bhv_desc_t *, int *, struct xfs_mount_args *); +extern int xvfs_root(bhv_desc_t *, struct xfs_vnode **); +extern int xvfs_statvfs(bhv_desc_t *, xfs_statfs_t *, struct xfs_vnode *); +extern int xvfs_sync(bhv_desc_t *, int, struct cred *); +extern int xvfs_vget(bhv_desc_t *, struct xfs_vnode **, struct fid *); +extern int xvfs_dmapiops(bhv_desc_t *, caddr_t); +extern int xvfs_quotactl(bhv_desc_t *, int, int, caddr_t); +extern struct inode *xvfs_get_inode(bhv_desc_t *, xfs_ino_t, int); +extern void xvfs_init_vnode(bhv_desc_t *, struct xfs_vnode *, bhv_desc_t *, int); +extern void xvfs_force_shutdown(bhv_desc_t *, int, char *, int); + +#define XFS_DMOPS "xfs_dm_operations" /* Data Migration */ +#define XFS_QMOPS "xfs_qm_operations" /* Quota Manager */ +#define XFS_IOOPS "xfs_io_operations" /* I/O subsystem */ +#define XFS_DM_MODULE "xfs_dmapi" +#define XFS_QM_MODULE "xfs_quota" +#define XFS_IO_MODULE "xfs_ioops" + +typedef struct bhv_vfsops { + struct xvfsops bhv_common; + void * bhv_custom; +} bhv_vfsops_t; + +typedef struct bhv_module { + bhv_desc_t bm_desc; + const char * bm_name; + bhv_vfsops_t * bm_ops; +} bhv_module_t; + +#define vfs_bhv_lookup(v, id) ( bhv_lookup_range(&(v)->vfs_bh, (id), (id)) ) +#define vfs_bhv_custom(b) ( ((bhv_vfsops_t *)BHV_OPS(b))->bhv_custom ) +#define vfs_bhv_set_custom(b,o) ( (b)->bhv_custom = (void *)(o)) +#define vfs_bhv_clr_custom(b) ( (b)->bhv_custom = NULL ) + +extern xfs_vfs_t *vfs_allocate(struct mount *); +extern void vfs_deallocate(xfs_vfs_t *); +extern void vfs_insertops(xfs_vfs_t *, bhv_vfsops_t *); +extern void vfs_insertbhv(xfs_vfs_t *, bhv_desc_t *, xvfsops_t *, void *); + +#define bhv_lookup_module(n,m) ( (m) ? \ + inter_module_get_request(n, m) : \ + inter_module_get(n) ) +#define bhv_remove_module(n) inter_module_put(n) +#define bhv_module_init(n,m,op) inter_module_register(n,m,op) +#define bhv_module_exit(n) inter_module_unregister(n) + +extern void bhv_insert_all_vfsops(struct xfs_vfs *); +extern void bhv_remove_all_vfsops(struct xfs_vfs *, int); +extern void bhv_remove_vfsops(struct xfs_vfs *, int); + +#endif /* __XFS_VFS_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_vnode.c b/sys/gnu/fs/xfs/FreeBSD/xfs_vnode.c new file mode 100644 index 000000000000..ac85f877f093 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_vnode.c @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" +#include "xfs_macros.h" +#include "xfs_types.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir.h" +#include "xfs_dir2.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_alloc_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h" +#include "xfs_imap.h" +#include "xfs_alloc.h" +#include "xfs_ialloc.h" +#include "xfs_attr_sf.h" +#include "xfs_dir_sf.h" +#include "xfs_dir2_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode_item.h" +#include "xfs_inode.h" + +void +vn_init(void) +{ +} + +struct xfs_vnode * +vn_initialize( + xfs_vnode_t *vp) +{ + XFS_STATS_INC(vn_active); + XFS_STATS_INC(vn_alloc); + + /* Initialize the first behavior and the behavior chain head. */ + vn_bhv_head_init(VN_BHV_HEAD(vp), "vnode"); + +#ifdef CONFIG_XFS_VNODE_TRACING + vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP); +#endif /* CONFIG_XFS_VNODE_TRACING */ + + vn_trace_exit(vp, "vn_initialize", (inst_t *)__return_address); + return vp; +} + +/* + * Get a reference on a vnode. Need to drop vnode reference + * to accomodate for vhold by VMAP regardless of whether or + * not we were able to successfully grab the vnode. + */ +xfs_vnode_t * +vn_get( + struct xfs_vnode *xfs_vp, + vmap_t *vmap) +{ + struct vnode *vp; + int error; + + XFS_STATS_INC(vn_get); + + vp = vmap->v_vp; + + error = vget(vp, 0, curthread); + if (error) { + vdrop(vp); + return (NULL); + } + + vdrop(vp); + if (vp->v_data != xfs_vp) { + vput(vp); + return (NULL); + } + + vn_trace_exit(vp, "vn_get", (inst_t *)__return_address); + return xfs_vp; +} + +/* + * purge a vnode from the cache + * At this point the vnode is guaranteed to have no references (vn_count == 0) + * The caller has to make sure that there are no ways someone could + * get a handle (via vn_get) on the vnode (usually done via a mount/vfs lock). + */ +void +vn_purge( + struct xfs_vnode *xfs_vp, + vmap_t *vmap) +{ + struct vnode *vp; + + vn_trace_entry(vp, "vn_purge", (inst_t *)__return_address); + + vp = vmap->v_vp; + + vn_lock(vp, LK_EXCLUSIVE, curthread); + vgone(vp); + VOP_UNLOCK(vp, 0, curthread); + vdrop(vp); +} + +/* + * Finish the removal of a vnode. + */ +void +vn_remove( + struct xfs_vnode *vp) +{ + vmap_t vmap; + + /* Make sure we don't do this to the same vnode twice */ + if (!(vp->v_fbhv)) + return; + + XFS_STATS_INC(vn_remove); + vn_trace_exit(vp, "vn_remove", (inst_t *)__return_address); + /* + * After the following purge the vnode + * will no longer exist. + */ + VMAP(vp, vmap); + vn_purge(vp, &vmap); +} + + +#ifdef CONFIG_XFS_VNODE_TRACING + +#define KTRACE_ENTER(vp, vk, s, line, ra) \ + ktrace_enter( (vp)->v_trace, \ +/* 0 */ (void *)(__psint_t)(vk), \ +/* 1 */ (void *)(s), \ +/* 2 */ (void *)(__psint_t) line, \ +/* 3 */ (void *)(vn_count(vp)), \ +/* 4 */ (void *)(ra), \ +/* 5 */ (void *)(__psunsigned_t)(vp)->v_flag, \ +/* 6 */ (void *)(__psint_t)smp_processor_id(), \ +/* 7 */ (void *)(__psint_t)(current->pid), \ +/* 8 */ (void *)__return_address, \ +/* 9 */ 0, 0, 0, 0, 0, 0, 0) + +/* + * Vnode tracing code. + */ +void +vn_trace_entry(xfs_vnode_t *vp, char *func, inst_t *ra) +{ + KTRACE_ENTER(vp, VNODE_KTRACE_ENTRY, func, 0, ra); +} + +void +vn_trace_exit(xfs_vnode_t *vp, char *func, inst_t *ra) +{ + KTRACE_ENTER(vp, VNODE_KTRACE_EXIT, func, 0, ra); +} + +void +vn_trace_hold(xfs_vnode_t *vp, char *file, int line, inst_t *ra) +{ + KTRACE_ENTER(vp, VNODE_KTRACE_HOLD, file, line, ra); +} + +void +vn_trace_ref(xfs_vnode_t *vp, char *file, int line, inst_t *ra) +{ + KTRACE_ENTER(vp, VNODE_KTRACE_REF, file, line, ra); +} + +void +vn_trace_rele(xfs_vnode_t *vp, char *file, int line, inst_t *ra) +{ + KTRACE_ENTER(vp, VNODE_KTRACE_RELE, file, line, ra); +} +#endif /* CONFIG_XFS_VNODE_TRACING */ diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_vnode.h b/sys/gnu/fs/xfs/FreeBSD/xfs_vnode.h new file mode 100644 index 000000000000..d224cb86a0c0 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_vnode.h @@ -0,0 +1,652 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + * + * Portions Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#ifndef __XFS_VNODE_H__ +#define __XFS_VNODE_H__ + +#include <sys/vnode.h> +#include <sys/namei.h> + +struct xfs_iomap; +typedef xfs_ino_t vnumber_t; +typedef struct componentname vname_t; +typedef bhv_head_t vn_bhv_head_t; + +/* + * MP locking protocols: + * v_flag, v_vfsp VN_LOCK/VN_UNLOCK + * v_type read-only or fs-dependent + */ +typedef struct xfs_vnode { + __u32 v_flag; /* vnode flags (see below) */ + enum vtype v_type; /* vnode type */ + struct xfs_vfs *v_vfsp; /* ptr to containing VFS */ + vnumber_t v_number; /* in-core vnode number */ + vn_bhv_head_t v_bh; /* behavior head */ + struct vnode *v_vnode; /* FreeBSD vnode */ + struct xfs_inode *v_inode; /* XFS inode */ +#ifdef XFS_VNODE_TRACE + struct ktrace *v_trace; /* trace header structure */ +#endif +} xfs_vnode_t; + +#define v_fbhv v_bh.bh_first /* first behavior */ +#define v_fops v_bh.bh_first->bd_ops /* first behavior ops */ + +#define VNODE_POSITION_BASE BHV_POSITION_BASE /* chain bottom */ +#define VNODE_POSITION_TOP BHV_POSITION_TOP /* chain top */ +#define VNODE_POSITION_INVALID BHV_POSITION_INVALID /* invalid pos. num */ + +typedef enum { + VN_BHV_UNKNOWN, /* not specified */ + VN_BHV_XFS, /* xfs */ + VN_BHV_DM, /* data migration */ + VN_BHV_QM, /* quota manager */ + VN_BHV_IO, /* IO path */ + VN_BHV_END /* housekeeping end-of-range */ +} vn_bhv_t; + +#define VNODE_POSITION_XFS (VNODE_POSITION_BASE) +#define VNODE_POSITION_DM (VNODE_POSITION_BASE+10) +#define VNODE_POSITION_QM (VNODE_POSITION_BASE+20) +#define VNODE_POSITION_IO (VNODE_POSITION_BASE+30) + +#define VPTOXFSVP(vp) ((struct xfs_vnode *)(vp)->v_data) + +/* + * Macros for dealing with the behavior descriptor inside of the vnode. + */ +#define BHV_TO_VNODE(bdp) ((xfs_vnode_t *)BHV_VOBJ(bdp)) +#define BHV_TO_VNODE_NULL(bdp) ((xfs_vnode_t *)BHV_VOBJNULL(bdp)) + +#define VN_BHV_HEAD(vp) ((bhv_head_t *)(&((vp)->v_bh))) +#define vn_bhv_head_init(bhp,name) bhv_head_init(bhp,name) +#define vn_bhv_remove(bhp,bdp) bhv_remove(bhp,bdp) +#define vn_bhv_lookup(bhp,ops) bhv_lookup(bhp,ops) +#define vn_bhv_lookup_unlocked(bhp,ops) bhv_lookup_unlocked(bhp,ops) + +/* + * Vnode to Linux inode mapping. + */ +#define LINVFS_GET_VP(inode) ((xfs_vnode_t *)NULL) +#define LINVFS_GET_IP(vp) ((xfs_inode_t *)NULL) + +#ifndef __FreeBSD__ +/* + * Convert between vnode types and inode formats (since POSIX.1 + * defines mode word of stat structure in terms of inode formats). + */ +extern enum vtype iftovt_tab[]; +extern u_short vttoif_tab[]; +#define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12]) +#define VTTOIF(indx) (vttoif_tab[(int)(indx)]) +#define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode)) +#endif + + +/* + * Vnode flags. + */ +#define VINACT 0x1 /* vnode is being inactivated */ +#define VRECLM 0x2 /* vnode is being reclaimed */ +#define VWAIT 0x4 /* waiting for VINACT/VRECLM to end */ +#define VMODIFIED 0x8 /* XFS inode state possibly differs */ + /* to the Linux inode state. */ + +/* + * Values for the VOP_RWLOCK and VOP_RWUNLOCK flags parameter. + */ +typedef enum vrwlock { + VRWLOCK_NONE, + VRWLOCK_READ, + VRWLOCK_WRITE, + VRWLOCK_WRITE_DIRECT, + VRWLOCK_TRY_READ, + VRWLOCK_TRY_WRITE +} vrwlock_t; + +/* + * Return values for VOP_INACTIVE. A return value of + * VN_INACTIVE_NOCACHE implies that the file system behavior + * has disassociated its state and bhv_desc_t from the vnode. + */ +#define VN_INACTIVE_CACHE 0 +#define VN_INACTIVE_NOCACHE 1 + +/* + * Values for the cmd code given to VOP_VNODE_CHANGE. + */ +typedef enum vchange { + VCHANGE_FLAGS_FRLOCKS = 0, + VCHANGE_FLAGS_ENF_LOCKING = 1, + VCHANGE_FLAGS_TRUNCATED = 2, + VCHANGE_FLAGS_PAGE_DIRTY = 3, + VCHANGE_FLAGS_IOEXCL_COUNT = 4 +} vchange_t; + +struct file_lock; +struct xfs_iomap_s; +struct xfs_vattr; +struct attrlist_cursor_kern; + +typedef int (*xfs_vop_open_t)(bhv_desc_t *, struct cred *); +typedef ssize_t (*xfs_vop_read_t)(bhv_desc_t *, uio_t *, int, struct cred *); +typedef ssize_t (*xfs_vop_write_t)(bhv_desc_t *, uio_t *, int, struct cred *); +typedef int (*xfs_vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *, + int, unsigned int, unsigned long); +typedef int (*xfs_vop_getattr_t)(bhv_desc_t *, struct xfs_vattr *, int, + struct cred *); +typedef int (*xfs_vop_setattr_t)(bhv_desc_t *, struct xfs_vattr *, int, + struct cred *); +typedef int (*xfs_vop_access_t)(bhv_desc_t *, int, struct cred *); +typedef int (*xfs_vop_lookup_t)(bhv_desc_t *, vname_t *, xfs_vnode_t **, + int, xfs_vnode_t *, struct cred *); +typedef int (*xfs_vop_create_t)(bhv_desc_t *, vname_t *, struct xfs_vattr *, + xfs_vnode_t **, struct cred *); +typedef int (*xfs_vop_remove_t)(bhv_desc_t *, vname_t *, struct cred *); +typedef int (*xfs_vop_link_t)(bhv_desc_t *, xfs_vnode_t *, vname_t *, + struct cred *); +typedef int (*xfs_vop_rename_t)(bhv_desc_t *, vname_t *, xfs_vnode_t *, vname_t *, + struct cred *); +typedef int (*xfs_vop_mkdir_t)(bhv_desc_t *, vname_t *, struct xfs_vattr *, + xfs_vnode_t **, struct cred *); +typedef int (*xfs_vop_rmdir_t)(bhv_desc_t *, vname_t *, struct cred *); +typedef int (*xfs_vop_readdir_t)(bhv_desc_t *, struct uio *, struct cred *, + int *); +typedef int (*xfs_vop_symlink_t)(bhv_desc_t *, vname_t *, struct xfs_vattr *, + char *, xfs_vnode_t **, struct cred *); + +typedef int (*xfs_vop_readlink_t)(bhv_desc_t *, struct uio *, int, + struct cred *); +typedef int (*xfs_vop_fsync_t)(bhv_desc_t *, int, struct cred *, + xfs_off_t, xfs_off_t); +typedef int (*xfs_vop_inactive_t)(bhv_desc_t *, struct cred *); +typedef int (*xfs_vop_fid2_t)(bhv_desc_t *, struct fid *); +typedef int (*xfs_vop_release_t)(bhv_desc_t *); +typedef int (*xfs_vop_rwlock_t)(bhv_desc_t *, vrwlock_t); +typedef void (*xfs_vop_rwunlock_t)(bhv_desc_t *, vrwlock_t); +typedef int (*xfs_vop_frlock_t)(bhv_desc_t *, int, struct file_lock *,int, + xfs_off_t, struct cred *); +typedef int (*xfs_vop_bmap_t)(bhv_desc_t *, xfs_off_t, ssize_t, int, + struct xfs_iomap *, int *); +typedef int (*xfs_vop_reclaim_t)(bhv_desc_t *); +typedef int (*xfs_vop_attr_get_t)(bhv_desc_t *, const char *, char *, int *, int, + struct cred *); +typedef int (*xfs_vop_attr_set_t)(bhv_desc_t *, const char *, char *, int, int, + struct cred *); +typedef int (*xfs_vop_attr_remove_t)(bhv_desc_t *, const char *, int, struct cred *); +typedef int (*xfs_vop_attr_list_t)(bhv_desc_t *, char *, int, int, + struct attrlist_cursor_kern *, struct cred *); +typedef void (*xfs_vop_link_removed_t)(bhv_desc_t *, xfs_vnode_t *, int); +typedef void (*xfs_vop_vnode_change_t)(bhv_desc_t *, vchange_t, __psint_t); +typedef void (*xfs_vop_ptossvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int); +typedef void (*xfs_vop_pflushinvalvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int); +typedef int (*xfs_vop_pflushvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, + uint64_t, int); +typedef int (*xfs_vop_iflush_t)(bhv_desc_t *, int); + + +typedef struct xfs_vnodeops { + bhv_position_t vn_position; /* position within behavior chain */ + xfs_vop_open_t vop_open; + xfs_vop_read_t vop_read; + xfs_vop_write_t vop_write; + xfs_vop_ioctl_t vop_ioctl; + xfs_vop_getattr_t vop_getattr; + xfs_vop_setattr_t vop_setattr; + xfs_vop_access_t vop_access; + xfs_vop_lookup_t vop_lookup; + xfs_vop_create_t vop_create; + xfs_vop_remove_t vop_remove; + xfs_vop_link_t vop_link; + xfs_vop_rename_t vop_rename; + xfs_vop_mkdir_t vop_mkdir; + xfs_vop_rmdir_t vop_rmdir; + xfs_vop_readdir_t vop_readdir; + xfs_vop_symlink_t vop_symlink; + xfs_vop_readlink_t vop_readlink; + xfs_vop_fsync_t vop_fsync; + xfs_vop_inactive_t vop_inactive; + xfs_vop_fid2_t vop_fid2; + xfs_vop_rwlock_t vop_rwlock; + xfs_vop_rwunlock_t vop_rwunlock; + xfs_vop_frlock_t vop_frlock; + xfs_vop_bmap_t vop_bmap; + xfs_vop_reclaim_t vop_reclaim; + xfs_vop_attr_get_t vop_attr_get; + xfs_vop_attr_set_t vop_attr_set; + xfs_vop_attr_remove_t vop_attr_remove; + xfs_vop_attr_list_t vop_attr_list; + xfs_vop_link_removed_t vop_link_removed; + xfs_vop_vnode_change_t vop_vnode_change; + xfs_vop_ptossvp_t vop_tosspages; + xfs_vop_pflushinvalvp_t vop_flushinval_pages; + xfs_vop_pflushvp_t vop_flush_pages; + xfs_vop_release_t vop_release; + xfs_vop_iflush_t vop_iflush; +} xfs_vnodeops_t; + +/* + * VOP's. + */ +#define _VOP_(op, vp) (*((xfs_vnodeops_t *)(vp)->v_fops)->op) + +#define XVOP_READ(vp,uio,ioflags,cr,rv) \ + rv = _VOP_(vop_read, vp)((vp)->v_fbhv,uio,ioflags,cr) +#define XVOP_WRITE(vp,file,uio,ioflags,cr,rv) \ + rv = _VOP_(vop_write, vp)((vp)->v_fbhv,uio,ioflags,cr) +#define XVOP_BMAP(vp,of,sz,rw,b,n,rv) \ + rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n) +#define XVOP_OPEN(vp, cr, rv) \ + rv = _VOP_(vop_open, vp)((vp)->v_fbhv, cr) +#define XVOP_GETATTR(vp, vap, f, cr, rv) \ + rv = _VOP_(vop_getattr, vp)((vp)->v_fbhv, vap, f, cr) +#define XVOP_SETATTR(vp, vap, f, cr, rv) \ + rv = _VOP_(vop_setattr, vp)((vp)->v_fbhv, vap, f, cr) +#define XVOP_ACCESS(vp, mode, cr, rv) \ + rv = _VOP_(vop_access, vp)((vp)->v_fbhv, mode, cr) +#define XVOP_LOOKUP(vp,d,vpp,f,rdir,cr,rv) \ + rv = _VOP_(vop_lookup, vp)((vp)->v_fbhv,d,vpp,f,rdir,cr) +#define XVOP_CREATE(dvp,d,vap,vpp,cr,rv) \ + rv = _VOP_(vop_create, dvp)((dvp)->v_fbhv,d,vap,vpp,cr) +#define XVOP_REMOVE(dvp,d,cr,rv) \ + rv = _VOP_(vop_remove, dvp)((dvp)->v_fbhv,d,cr) +#define XVOP_LINK(tdvp,fvp,d,cr,rv) \ + rv = _VOP_(vop_link, tdvp)((tdvp)->v_fbhv,fvp,d,cr) +#define XVOP_RENAME(fvp,fnm,tdvp,tnm,cr,rv) \ + rv = _VOP_(vop_rename, fvp)((fvp)->v_fbhv,fnm,tdvp,tnm,cr) +#define XVOP_MKDIR(dp,d,vap,vpp,cr,rv) \ + rv = _VOP_(vop_mkdir, dp)((dp)->v_fbhv,d,vap,vpp,cr) +#define XVOP_RMDIR(dp,d,cr,rv) \ + rv = _VOP_(vop_rmdir, dp)((dp)->v_fbhv,d,cr) +#define XVOP_READDIR(vp,uiop,cr,eofp,rv) \ + rv = _VOP_(vop_readdir, vp)((vp)->v_fbhv,uiop,cr,eofp) +#define XVOP_SYMLINK(dvp,d,vap,tnm,vpp,cr,rv) \ + rv = _VOP_(vop_symlink, dvp) ((dvp)->v_fbhv,d,vap,tnm,vpp,cr) +#define XVOP_READLINK(vp,uiop,fl,cr,rv) \ + rv = _VOP_(vop_readlink, vp)((vp)->v_fbhv,uiop,fl,cr) + +#define XVOP_FSYNC(vp,f,cr,b,e,rv) \ + rv = _VOP_(vop_fsync, vp)((vp)->v_fbhv,f,cr,b,e) +#define XVOP_INACTIVE(vp, cr, rv) \ + rv = _VOP_(vop_inactive, vp)((vp)->v_fbhv, cr) +#define XVOP_RELEASE(vp, rv) \ + rv = _VOP_(vop_release, vp)((vp)->v_fbhv) +#define XVOP_FID2(vp, fidp, rv) \ + rv = _VOP_(vop_fid2, vp)((vp)->v_fbhv, fidp) +#define XVOP_RWLOCK(vp,i) \ + (void)_VOP_(vop_rwlock, vp)((vp)->v_fbhv, i) +#define XVOP_RWLOCK_TRY(vp,i) \ + _VOP_(vop_rwlock, vp)((vp)->v_fbhv, i) +#define XVOP_RWUNLOCK(vp,i) \ + (void)_VOP_(vop_rwunlock, vp)((vp)->v_fbhv, i) +#define XVOP_FRLOCK(vp,c,fl,flags,offset,fr,rv) \ + rv = _VOP_(vop_frlock, vp)((vp)->v_fbhv,c,fl,flags,offset,fr) +#define XVOP_RECLAIM(vp, rv) \ + rv = _VOP_(vop_reclaim, vp)((vp)->v_fbhv) +#define XVOP_ATTR_GET(vp, name, val, vallenp, fl, cred, rv) \ + rv = _VOP_(vop_attr_get, vp)((vp)->v_fbhv,name,val,vallenp,fl,cred) +#define XVOP_ATTR_SET(vp, name, val, vallen, fl, cred, rv) \ + rv = _VOP_(vop_attr_set, vp)((vp)->v_fbhv,name,val,vallen,fl,cred) +#define XVOP_ATTR_REMOVE(vp, name, flags, cred, rv) \ + rv = _VOP_(vop_attr_remove, vp)((vp)->v_fbhv,name,flags,cred) +#define XVOP_ATTR_LIST(vp, buf, buflen, fl, cursor, cred, rv) \ + rv = _VOP_(vop_attr_list, vp)((vp)->v_fbhv,buf,buflen,fl,cursor,cred) +#define XVOP_LINK_REMOVED(vp, dvp, linkzero) \ + (void)_VOP_(vop_link_removed, vp)((vp)->v_fbhv, dvp, linkzero) +#define XVOP_VNODE_CHANGE(vp, cmd, val) \ + (void)_VOP_(vop_vnode_change, vp)((vp)->v_fbhv,cmd,val) +/* + * These are page cache functions that now go thru VOPs. + * 'last' parameter is unused and left in for IRIX compatibility + */ +#define XVOP_TOSS_PAGES(vp, first, last, fiopt) \ + _VOP_(vop_tosspages, vp)((vp)->v_fbhv,first, last, fiopt) +/* + * 'last' parameter is unused and left in for IRIX compatibility + */ +#define XVOP_FLUSHINVAL_PAGES(vp, first, last, fiopt) \ + _VOP_(vop_flushinval_pages, vp)((vp)->v_fbhv,first,last,fiopt) +/* + * 'last' parameter is unused and left in for IRIX compatibility + */ +#define XVOP_FLUSH_PAGES(vp, first, last, flags, fiopt, rv) \ + rv = _VOP_(vop_flush_pages, vp)((vp)->v_fbhv,first,last,flags,fiopt) +#define XVOP_IOCTL(vp, inode, filp, fl, cmd, arg, rv) \ + rv = _VOP_(vop_ioctl, vp)((vp)->v_fbhv,inode,filp,fl,cmd,arg) +#define XVOP_IFLUSH(vp, flags, rv) \ + rv = _VOP_(vop_iflush, vp)((vp)->v_fbhv, flags) + +/* + * Flags for read/write calls - select values from FreeBSD IO_ flags + * or non-conflicting bits. + */ +#define IO_ISDIRECT IO_DIRECT /* bypass page cache */ +#define IO_INVIS 0x02000 /* don't update inode timestamps */ +#define IO_ISLOCKED 0x04000 /* don't do inode locking */ + +/* + * Flags for VOP_IFLUSH call + */ +#define FLUSH_SYNC 1 /* wait for flush to complete */ +#define FLUSH_INODE 2 /* flush the inode itself */ +#define FLUSH_LOG 4 /* force the last log entry for + * this inode out to disk */ + +/* + * Flush/Invalidate options for VOP_TOSS_PAGES, VOP_FLUSHINVAL_PAGES and + * VOP_FLUSH_PAGES. + */ +#define FI_NONE 0 /* none */ +#define FI_REMAPF 1 /* Do a remapf prior to the operation */ +#define FI_REMAPF_LOCKED 2 /* Do a remapf prior to the operation. + Prevent VM access to the pages until + the operation completes. */ + +/* + * Vnode attributes. va_mask indicates those attributes the caller + * wants to set or extract. + */ +typedef struct xfs_vattr { + int va_mask; /* bit-mask of attributes present */ + enum vtype va_type; /* vnode type (for create) */ + mode_t va_mode; /* file access mode and type */ + nlink_t va_nlink; /* number of references to file */ + uid_t va_uid; /* owner user id */ + gid_t va_gid; /* owner group id */ + xfs_ino_t va_nodeid; /* file id */ + xfs_off_t va_size; /* file size in bytes */ + u_long va_blocksize; /* blocksize preferred for i/o */ + struct timespec va_atime; /* time of last access */ + struct timespec va_mtime; /* time of last modification */ + struct timespec va_ctime; /* time file changed */ + u_int va_gen; /* generation number of file */ + xfs_dev_t va_rdev; /* device the special file represents */ + __int64_t va_nblocks; /* number of blocks allocated */ + u_long va_xflags; /* random extended file flags */ + u_long va_extsize; /* file extent size */ + u_long va_nextents; /* number of extents in file */ + u_long va_anextents; /* number of attr extents in file */ + int va_projid; /* project id */ +} xfs_vattr_t; + +/* + * setattr or getattr attributes + */ +#define XFS_AT_TYPE 0x00000001 +#define XFS_AT_MODE 0x00000002 +#define XFS_AT_UID 0x00000004 +#define XFS_AT_GID 0x00000008 +#define XFS_AT_FSID 0x00000010 +#define XFS_AT_NODEID 0x00000020 +#define XFS_AT_NLINK 0x00000040 +#define XFS_AT_SIZE 0x00000080 +#define XFS_AT_ATIME 0x00000100 +#define XFS_AT_MTIME 0x00000200 +#define XFS_AT_CTIME 0x00000400 +#define XFS_AT_RDEV 0x00000800 +#define XFS_AT_BLKSIZE 0x00001000 +#define XFS_AT_NBLOCKS 0x00002000 +#define XFS_AT_VCODE 0x00004000 +#define XFS_AT_MAC 0x00008000 +#define XFS_AT_UPDATIME 0x00010000 +#define XFS_AT_UPDMTIME 0x00020000 +#define XFS_AT_UPDCTIME 0x00040000 +#define XFS_AT_ACL 0x00080000 +#define XFS_AT_CAP 0x00100000 +#define XFS_AT_INF 0x00200000 +#define XFS_AT_XFLAGS 0x00400000 +#define XFS_AT_EXTSIZE 0x00800000 +#define XFS_AT_NEXTENTS 0x01000000 +#define XFS_AT_ANEXTENTS 0x02000000 +#define XFS_AT_PROJID 0x04000000 +#define XFS_AT_SIZE_NOPERM 0x08000000 +#define XFS_AT_GENCOUNT 0x10000000 + +#define XFS_AT_ALL (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\ + XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\ + XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\ + XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|XFS_AT_MAC|\ + XFS_AT_ACL|XFS_AT_CAP|XFS_AT_INF|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|\ + XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT) + +#define XFS_AT_STAT (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\ + XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\ + XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\ + XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_PROJID) + +#define XFS_AT_TIMES (XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME) + +#define XFS_AT_UPDTIMES (XFS_AT_UPDATIME|XFS_AT_UPDMTIME|XFS_AT_UPDCTIME) + +#define XFS_AT_NOSET (XFS_AT_NLINK|XFS_AT_RDEV|XFS_AT_FSID|XFS_AT_NODEID|\ + XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\ + XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT) + +#ifndef __FreeBSD__ +/* + * Modes. + */ +#define VSUID S_ISUID /* set user id on execution */ +#define VSGID S_ISGID /* set group id on execution */ +#define VSVTX S_ISVTX /* save swapped text even after use */ +#define VREAD S_IRUSR /* read, write, execute permissions */ +#define VWRITE S_IWUSR +#define VEXEC S_IXUSR +#endif /* __FreeBSD__ */ + +#define MODEMASK ALLPERMS /* mode bits plus permission bits */ + +/* + * Check whether mandatory file locking is enabled. + */ +#define MANDLOCK(vp, mode) \ + ((vp)->v_type == VREG && ((mode) & (VSGID|(VEXEC>>3))) == VSGID) + +extern void vn_init(void); +extern int vn_wait(struct xfs_vnode *); +extern xfs_vnode_t *vn_initialize(struct xfs_vnode *); + +/* + * Acquiring and invalidating vnodes: + * + * if (vn_get(vp, version, 0)) + * ...; + * vn_purge(vp, version); + * + * vn_get and vn_purge must be called with vmap_t arguments, sampled + * while a lock that the vnode's VOP_RECLAIM function acquires is + * held, to ensure that the vnode sampled with the lock held isn't + * recycled (VOP_RECLAIMed) or deallocated between the release of the lock + * and the subsequent vn_get or vn_purge. + */ + +/* + * vnode_map structures _must_ match vn_epoch and vnode structure sizes. + */ +typedef struct vnode_map { + xfs_vfs_t *v_vfsp; + xfs_ino_t v_ino; + struct vnode *v_vp; +} vmap_t; + +#define VMAP(vp, vmap) {(vmap).v_vfsp = (vp)->v_vfsp; \ + (vmap).v_vp = (vp)->v_vnode; \ + (vmap).v_ino = (vp)->v_inode->i_ino;\ + vhold((vp)->v_vnode); \ + } + +extern void vn_purge(struct xfs_vnode *, vmap_t *); +extern xfs_vnode_t *vn_get(struct xfs_vnode *, vmap_t *); +extern int vn_revalidate(struct xfs_vnode *); +extern void vn_remove(struct xfs_vnode *); + +static inline int vn_count(struct xfs_vnode *vp) +{ + return vp->v_vnode->v_usecount; +} + +/* + * Vnode reference counting functions (and macros for compatibility). + */ +extern xfs_vnode_t *vn_hold(struct xfs_vnode *); +extern void vn_rele(struct xfs_vnode *); + +#if defined(XFS_VNODE_TRACE) +#define VN_HOLD(vp) \ + ((void)vref((vp)->v_vnode), \ + vn_trace_hold(vp, __FILE__, __LINE__, (inst_t *)__return_address)) +#define VN_RELE(vp) \ + (vn_trace_rele(vp, __FILE__, __LINE__, (inst_t *)__return_address), \ + vrele((vp)->v_vnode)) +#else +#define VN_HOLD(vp) vref((vp)->v_vnode) +#define VN_RELE(vp) vrele((vp)->v_vnode) +#endif + +/* + * Vname handling macros. + */ +#define VNAME(cnp) ((cnp)->cn_nameptr) +#define VNAMELEN(cnp) ((cnp)->cn_namelen) +#define VNAME_TO_VNODE(dentry) (printf("VNAME_TO_VNODE NI"), (xfs_vnode_t *)0) + +/* + * Vnode spinlock manipulation. + */ +#define VN_LOCK(vp) VI_LOCK(vp->v_vnode) +#define VN_UNLOCK(vp, s) VI_UNLOCK(vp->v_vnode) +#define VN_FLAGSET(vp,b) vn_flagset(vp,b) +#define VN_FLAGCLR(vp,b) vn_flagclr(vp,b) + +static __inline__ void vn_flagset(struct xfs_vnode *vp, __u32 flag) +{ + VN_LOCK(vp); + vp->v_flag |= flag; + VN_UNLOCK(vp, 0); +} + +static __inline__ void vn_flagclr(struct xfs_vnode *vp, __u32 flag) +{ + VN_LOCK(vp); + vp->v_flag &= ~flag; + VN_UNLOCK(vp, 0); +} + +/* + * Update modify/access/change times on the vnode + */ +#define VN_MTIMESET(vp, tvp) +#define VN_ATIMESET(vp, tvp) +#define VN_CTIMESET(vp, tvp) + +/* + * Some useful predicates. + */ +#define VN_MAPPED(vp) 0 +#define VN_CACHED(vp) 0 +#define VN_DIRTY(vp) 0 +#define VMODIFY(vp) VN_FLAGSET(vp, VMODIFIED) +#define VUNMODIFY(vp) VN_FLAGCLR(vp, VMODIFIED) + +/* + * Flags to VOP_SETATTR/VOP_GETATTR. + */ +#define ATTR_UTIME 0x01 /* non-default utime(2) request */ +#define ATTR_DMI 0x08 /* invocation from a DMI function */ +#define ATTR_LAZY 0x80 /* set/get attributes lazily */ +#define ATTR_NONBLOCK 0x100 /* return EAGAIN if operation would block */ + +/* + * Flags to VOP_FSYNC and VOP_RECLAIM. + */ +#define FSYNC_NOWAIT 0 /* asynchronous flush */ +#define FSYNC_WAIT 0x1 /* synchronous fsync or forced reclaim */ +#define FSYNC_INVAL 0x2 /* flush and invalidate cached data */ +#define FSYNC_DATA 0x4 /* synchronous fsync of data only */ + +/* + * Tracking vnode activity. + */ +#if defined(XFS_VNODE_TRACE) + +#define VNODE_TRACE_SIZE 16 /* number of trace entries */ +#define VNODE_KTRACE_ENTRY 1 +#define VNODE_KTRACE_EXIT 2 +#define VNODE_KTRACE_HOLD 3 +#define VNODE_KTRACE_REF 4 +#define VNODE_KTRACE_RELE 5 + +extern void vn_trace_entry(struct xfs_vnode *, char *, inst_t *); +extern void vn_trace_exit(struct xfs_vnode *, char *, inst_t *); +extern void vn_trace_hold(struct xfs_vnode *, char *, int, inst_t *); +extern void vn_trace_ref(struct xfs_vnode *, char *, int, inst_t *); +extern void vn_trace_rele(struct xfs_vnode *, char *, int, inst_t *); + +#define VN_TRACE(vp) \ + vn_trace_ref(vp, __FILE__, __LINE__, (inst_t *)__return_address) +#else +#define vn_trace_entry(a,b,c) +#define vn_trace_exit(a,b,c) +#define vn_trace_hold(a,b,c,d) +#define vn_trace_ref(a,b,c,d) +#define vn_trace_rele(a,b,c,d) +#define VN_TRACE(vp) +#endif + +#endif /* __XFS_VNODE_H__ */ diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_vnops.c b/sys/gnu/fs/xfs/FreeBSD/xfs_vnops.c new file mode 100644 index 000000000000..dc9f6f698834 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_vnops.c @@ -0,0 +1,1431 @@ +/* + * Copyright (c) 2001, Alexander Kabaev + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/namei.h> +#include <sys/kernel.h> +#include <sys/fcntl.h> +#include <sys/mount.h> +#include <sys/unistd.h> +#include <sys/vnode.h> +#include <sys/dirent.h> +#include <sys/ioccom.h> +#include <sys/malloc.h> +#include <sys/extattr.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_object.h> +#include <vm/vm_page.h> +#include <vm/vm_pager.h> +#include <vm/vnode_pager.h> + +#include <fs/fifofs/fifo.h> + +#define NO_VFS_MACROS +#include "xfs.h" +#include "xfs_macros.h" +#include "xfs_types.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir.h" +#include "xfs_dir2.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_alloc_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h" +#include "xfs_imap.h" +#include "xfs_alloc.h" +#include "xfs_ialloc.h" +#include "xfs_attr.h" +#include "xfs_attr_sf.h" +#include "xfs_dir_sf.h" +#include "xfs_dir2_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode_item.h" +#include "xfs_inode.h" +#include "xfs_acl.h" +#include "xfs_cap.h" +#include "xfs_mac.h" +#include "xfs_iomap.h" +#include "xfs_clnt.h" +#include "xfs_mountops.h" + +/* + * Prototypes for XFS vnode operations. + */ +static vop_access_t _xfs_access; +static vop_advlock_t _xfs_advlock; +static vop_bmap_t _xfs_bmap; +static vop_cachedlookup_t _xfs_cachedlookup; +static vop_close_t _xfs_close; +static vop_create_t _xfs_create; +static vop_fsync_t _xfs_fsync; +static vop_getattr_t _xfs_getattr; +static vop_getextattr_t _xfs_getextattr; +static vop_inactive_t _xfs_inactive; +static vop_ioctl_t _xfs_ioctl; +static vop_link_t _xfs_link; +static vop_listextattr_t _xfs_listextattr; +static vop_mkdir_t _xfs_mkdir; +static vop_mknod_t _xfs_mknod; +static vop_open_t _xfs_open; +static vop_read_t _xfs_read; +static vop_readdir_t _xfs_readdir; +static vop_readlink_t _xfs_readlink; +static vop_reclaim_t _xfs_reclaim; +static vop_remove_t _xfs_remove; +static vop_rename_t _xfs_rename; +static vop_rmdir_t _xfs_rmdir; +static vop_setattr_t _xfs_setattr; +static vop_strategy_t _xfs_strategy; +static vop_symlink_t _xfs_symlink; +static vop_write_t _xfs_write; + +struct vop_vector xfs_vnops = { + .vop_default = &default_vnodeops, + .vop_access = _xfs_access, + .vop_advlock = _xfs_advlock, + .vop_bmap = _xfs_bmap, + .vop_cachedlookup = _xfs_cachedlookup, + .vop_close = _xfs_close, + .vop_create = _xfs_create, + .vop_fsync = _xfs_fsync, + .vop_getattr = _xfs_getattr, + .vop_getextattr = _xfs_getextattr, + .vop_inactive = _xfs_inactive, + .vop_ioctl = _xfs_ioctl, + .vop_link = _xfs_link, + .vop_listextattr = _xfs_listextattr, + .vop_lookup = vfs_cache_lookup, + .vop_mkdir = _xfs_mkdir, + .vop_mknod = _xfs_mknod, + .vop_open = _xfs_open, + .vop_read = _xfs_read, + .vop_readdir = _xfs_readdir, + .vop_readlink = _xfs_readlink, + .vop_reclaim = _xfs_reclaim, + .vop_remove = _xfs_remove, + .vop_rename = _xfs_rename, + .vop_rmdir = _xfs_rmdir, + .vop_setattr = _xfs_setattr, + .vop_strategy = _xfs_strategy, + .vop_symlink = _xfs_symlink, + .vop_write = _xfs_write, +}; + +/* + * FIFO's specific operations. + */ + +static vop_close_t _xfsfifo_close; +static vop_read_t _xfsfifo_read; +static vop_kqfilter_t _xfsfifo_kqfilter; +static vop_write_t _xfsfifo_write; + +struct vop_vector xfs_fifoops = { + .vop_default = &fifo_specops, + .vop_access = _xfs_access, + .vop_close = _xfsfifo_close, + .vop_fsync = _xfs_fsync, + .vop_getattr = _xfs_getattr, + .vop_inactive = _xfs_inactive, + .vop_kqfilter = _xfsfifo_kqfilter, + .vop_read = _xfsfifo_read, + .vop_reclaim = _xfs_reclaim, + .vop_setattr = _xfs_setattr, + .vop_write = _xfsfifo_write, +}; + +static int +_xfs_access( + struct vop_access_args /* { + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct thread *a_td; + } */ *ap) +{ + int error; + + XVOP_ACCESS(VPTOXFSVP(ap->a_vp), ap->a_mode, ap->a_cred, error); + return (error); +} + +static int +_xfs_open( + struct vop_open_args /* { + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct thread *a_td; + int a_fdidx; + } */ *ap) +{ + int error; + + XVOP_OPEN(VPTOXFSVP(ap->a_vp), ap->a_cred, error); + if (error == 0) + vnode_create_vobject(ap->a_vp, 0, ap->a_td); + return (error); +} + +static int +_xfs_close( + struct vop_close_args /* { + struct vnodeop_desc *a_desc; + struct vnode *a_vp; + int a_fflag; + struct ucred *a_cred; + struct thread *a_td; + } */ *ap) +{ + int error = 0; + /* XVOP_CLOSE(VPTOXFSVP(ap->a_vp), NULL, error); */ + return (error); +} + +static int +_xfs_getattr( + struct vop_getattr_args /* { + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct thread *a_td; + } */ *ap) +{ + struct vnode *vp = ap->a_vp; + struct vattr *vap = ap->a_vap; + struct mount *mp; + xfs_vattr_t va; + int error; + + + VATTR_NULL(vap); + memset(&va,0,sizeof(xfs_vattr_t)); + va.va_mask = XFS_AT_STAT|XFS_AT_GENCOUNT|XFS_AT_XFLAGS; + + XVOP_GETATTR(VPTOXFSVP(vp), &va, 0, ap->a_cred, error); + if (error) return (error); + + mp = vp->v_mount; + + vap->va_type = va.va_type; + vap->va_mode = va.va_mode; + vap->va_nlink = va.va_nlink; + vap->va_uid = va.va_uid; + vap->va_gid = va.va_gid; + vap->va_fsid = mp->mnt_stat.f_fsid.val[0]; + vap->va_fileid = va.va_nodeid; + vap->va_size = va.va_size; + vap->va_blocksize = va.va_blocksize; + vap->va_atime = va.va_atime; + vap->va_mtime = va.va_mtime; + vap->va_ctime = va.va_ctime; + vap->va_gen = va.va_gen; + vap->va_rdev = va.va_rdev; + vap->va_bytes = (va.va_nblocks << BBSHIFT); + + /* XFS now supports devices that have block sizes + * other than 512 so BBSHIFT will work for now + * but need to get this value from the super block + */ + + /* + * Fields with no direct equivalent in XFS + * leave initialized by VATTR_NULL + */ +#if 0 + vap->va_filerev = 0; + vap->va_birthtime = va.va_ctime; + vap->va_vaflags = 0; + vap->va_flags = 0; + vap->va_spare = 0; +#endif + + return (0); +} + +static int +_xfs_setattr( + struct vop_setattr_args /* { + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct thread *a_td; + } */ *ap) +{ + struct vnode *vp = ap->a_vp; + struct vattr *vap = ap->a_vap; + xfs_vattr_t va; + int error; + + /* + * Check for unsettable attributes. + */ + if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || + (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || + (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || + ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) + return (EINVAL); + + memset(&va, 0, sizeof(va)); + + if (vap->va_uid != (uid_t)VNOVAL) { + va.va_mask |= XFS_AT_UID; + va.va_uid = vap->va_uid; + } + if (vap->va_gid != (gid_t)VNOVAL) { + va.va_mask |= XFS_AT_GID; + va.va_gid = vap->va_gid; + } + if (vap->va_size != VNOVAL) { + va.va_mask |= XFS_AT_SIZE; + va.va_size = vap->va_size; + } + if (vap->va_atime.tv_sec != VNOVAL) { + va.va_mask |= XFS_AT_ATIME; + va.va_atime = vap->va_atime; + } + if (vap->va_mtime.tv_sec != VNOVAL) { + va.va_mask |= XFS_AT_MTIME; + va.va_mtime = vap->va_mtime; + } + if (vap->va_ctime.tv_sec != VNOVAL) { + va.va_mask |= XFS_AT_CTIME; + va.va_ctime = vap->va_ctime; + } + if (vap->va_mode != (mode_t)VNOVAL) { + va.va_mask |= XFS_AT_MODE; + va.va_mode = vap->va_mode; + } + + XVOP_SETATTR(VPTOXFSVP(vp), &va, 0, ap->a_cred, error); + return (error); +} + +static int +_xfs_inactive( + struct vop_inactive_args /* { + struct vnode *a_vp; + struct thread *a_td; + } */ *ap) +{ + struct vnode *vp = ap->a_vp; + struct thread *td = ap->a_td; + int error; + + XVOP_INACTIVE(VPTOXFSVP(vp), td->td_ucred, error); + return (error); +} + +static int +_xfs_read( + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap) +{ + struct vnode *vp = ap->a_vp; + struct uio *uio = ap->a_uio; + int error; + + switch (vp->v_type) { + case VREG: + break; + case VDIR: + return (EISDIR); + default: + return (EPERM); + }; + + XVOP_READ(VPTOXFSVP(vp), uio, ap->a_ioflag, ap->a_cred, error); + return error; +} + +int +xfs_read_file(xfs_mount_t *mp, xfs_inode_t *ip, struct uio *uio, int ioflag); + +int +xfs_read_file(xfs_mount_t *mp, xfs_inode_t *ip, struct uio *uio, int ioflag) +{ + xfs_fileoff_t lbn, nextlbn; + xfs_fsize_t bytesinfile; + long size, xfersize, blkoffset; + struct buf *bp; + struct vnode *vp; + int error, orig_resid; + int seqcount; + + seqcount = ioflag >> IO_SEQSHIFT; + + orig_resid = uio->uio_resid; + if (orig_resid <= 0) + return (0); + + vp = XFS_ITOV(ip)->v_vnode; + + /* + * Ok so we couldn't do it all in one vm trick... + * so cycle around trying smaller bites.. + */ + for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { + if ((bytesinfile = ip->i_d.di_size - uio->uio_offset) <= 0) + break; + + lbn = XFS_B_TO_FSBT(mp, uio->uio_offset); + nextlbn = lbn + 1; + + /* + * size of buffer. The buffer representing the + * end of the file is rounded up to the size of + * the block type ( fragment or full block, + * depending ). + */ + size = mp->m_sb.sb_blocksize; + blkoffset = XFS_B_FSB_OFFSET(mp, uio->uio_offset); + + /* + * The amount we want to transfer in this iteration is + * one FS block less the amount of the data before + * our startpoint (duh!) + */ + xfersize = mp->m_sb.sb_blocksize - blkoffset; + + /* + * But if we actually want less than the block, + * or the file doesn't have a whole block more of data, + * then use the lesser number. + */ + if (uio->uio_resid < xfersize) + xfersize = uio->uio_resid; + if (bytesinfile < xfersize) + xfersize = bytesinfile; + + if (XFS_FSB_TO_B(mp, nextlbn) >= ip->i_d.di_size ) { + /* + * Don't do readahead if this is the end of the file. + */ + error = bread(vp, lbn, size, NOCRED, &bp); + } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { + /* + * Otherwise if we are allowed to cluster, + * grab as much as we can. + * + * XXX This may not be a win if we are not + * doing sequential access. + */ + error = cluster_read(vp, ip->i_d.di_size, lbn, + size, NOCRED, uio->uio_resid, seqcount, &bp); + } else if (seqcount > 1) { + /* + * If we are NOT allowed to cluster, then + * if we appear to be acting sequentially, + * fire off a request for a readahead + * as well as a read. Note that the 4th and 5th + * arguments point to arrays of the size specified in + * the 6th argument. + */ + int nextsize = mp->m_sb.sb_blocksize; + error = breadn(vp, lbn, + size, &nextlbn, &nextsize, 1, NOCRED, &bp); + } else { + /* + * Failing all of the above, just read what the + * user asked for. Interestingly, the same as + * the first option above. + */ + error = bread(vp, lbn, size, NOCRED, &bp); + } + if (error) { + brelse(bp); + bp = NULL; + break; + } + + /* + * If IO_DIRECT then set B_DIRECT for the buffer. This + * will cause us to attempt to release the buffer later on + * and will cause the buffer cache to attempt to free the + * underlying pages. + */ + if (ioflag & IO_DIRECT) + bp->b_flags |= B_DIRECT; + + /* + * We should only get non-zero b_resid when an I/O error + * has occurred, which should cause us to break above. + * However, if the short read did not cause an error, + * then we want to ensure that we do not uiomove bad + * or uninitialized data. + */ + size -= bp->b_resid; + if (size < xfersize) { + if (size == 0) + break; + xfersize = size; + } + + /* + * otherwise use the general form + */ + error = uiomove((char *)bp->b_data + blkoffset, + (int)xfersize, uio); + + if (error) + break; + + if (ioflag & (IO_VMIO|IO_DIRECT) ) { + /* + * If there are no dependencies, and it's VMIO, + * then we don't need the buf, mark it available + * for freeing. The VM has the data. + */ + bp->b_flags |= B_RELBUF; + brelse(bp); + } else { + /* + * Otherwise let whoever + * made the request take care of + * freeing it. We just queue + * it onto another list. + */ + bqrelse(bp); + } + } + + /* + * This can only happen in the case of an error + * because the loop above resets bp to NULL on each iteration + * and on normal completion has not set a new value into it. + * so it must have come from a 'break' statement + */ + if (bp != NULL) { + if (ioflag & (IO_VMIO|IO_DIRECT)) { + bp->b_flags |= B_RELBUF; + brelse(bp); + } else + bqrelse(bp); + } + + return (error); +} + +static int +_xfs_write(struct vop_write_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap) +{ + struct vnode *vp = ap->a_vp; +/* struct uio *uio = ap->a_uio; */ + + if (vp->v_type != VREG) + return (EPERM); + return (EPERM); +} + +static int +_xfs_create( + struct vop_create_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap) +{ + struct vnode *dvp = ap->a_dvp; + struct vattr *vap = ap->a_vap; + struct thread *td = curthread; + struct ucred *credp = td->td_ucred; + struct componentname *cnp = ap->a_cnp; + xfs_vnode_t *xvp; + xfs_vattr_t va; + int error; + + memset(&va, 0, sizeof (va)); + va.va_mask |= XFS_AT_MODE; + va.va_mode = vap->va_mode; + va.va_mask |= XFS_AT_TYPE; + va.va_type = vap->va_type; + + xvp = NULL; + XVOP_CREATE(VPTOXFSVP(dvp), cnp, &va, &xvp, credp, error); + + if (error == 0) { + *ap->a_vpp = xvp->v_vnode; + VOP_LOCK(xvp->v_vnode, LK_EXCLUSIVE, td); + } + + return (error); +} + +static int +_xfs_remove( + struct vop_remove_args /* { + struct vnodeop_desc *a_desc; + struct vnode * a_dvp; + struct vnode * a_vp; + struct componentname * a_cnp; + } */ *ap) +{ + struct vnode *vp = ap->a_vp; + /* + struct vnode *dvp = ap->a_dvp; + struct componentname *cnp = ap->a_cnp; + */ + int error; + + if (vp->v_type == VDIR || vp->v_usecount != 1) + return (EPERM); + + error = 0; + cache_purge(vp); + return (error); +} + +static int +_xfs_rename( + struct vop_rename_args /* { + struct vnode *a_fdvp; + struct vnode *a_fvp; + struct componentname *a_fcnp; + struct vnode *a_tdvp; + struct vnode *a_tvp; + struct componentname *a_tcnp; + } */ *ap) +{ + struct vnode *fvp = ap->a_fvp; + struct vnode *tvp = ap->a_tvp; + struct vnode *fdvp = ap->a_fdvp; + struct vnode *tdvp = ap->a_tdvp; +/* struct componentname *tcnp = ap->a_tcnp; */ +/* struct componentname *fcnp = ap->a_fcnp;*/ + int error = EPERM; + + if (error) + goto out; + + /* Check for cross-device rename */ + if ((fvp->v_mount != tdvp->v_mount) || + (tvp && (fvp->v_mount != tvp->v_mount))) { + error = EXDEV; + goto out; + } + + if (tvp && tvp->v_usecount > 1) { + error = EBUSY; + goto out; + } + + if (fvp->v_type == VDIR) { + if (tvp != NULL && tvp->v_type == VDIR) + cache_purge(tdvp); + cache_purge(fdvp); + } +out: + if (tdvp == tvp) + vrele(tdvp); + else + vput(tdvp); + if (tvp) + vput(tvp); + vrele(fdvp); + vrele(fvp); + vgone(fvp); + if (tvp) + vgone(tvp); + return (error); +} + +static int +_xfs_link( + struct vop_link_args /* { + struct vnode *a_tdvp; + struct vnode *a_vp; + struct componentname *a_cnp; + } */ *ap) +{ + xfs_vnode_t *tdvp, *vp; + int error; + + tdvp = VPTOXFSVP(ap->a_tdvp); + vp = VPTOXFSVP(ap->a_vp); + XVOP_LINK(tdvp, vp, ap->a_cnp, NULL, error); + return (error); +} + +static int +_xfs_symlink( + struct vop_symlink_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + char *a_target; + } */ *ap) +{ + struct thread *td = curthread; + struct ucred *credp = td->td_ucred; + xfs_vnode_t *xvp; + xfs_vattr_t va; + int error; + + memset(&va, 0, sizeof (va)); + + va.va_mask |= XFS_AT_MODE; + va.va_mode = ap->a_vap->va_mode; + va.va_mask |= XFS_AT_TYPE; + va.va_type = ap->a_vap->va_type; + + XVOP_SYMLINK(VPTOXFSVP(ap->a_dvp), ap->a_cnp, &va, ap->a_target, + &xvp, credp, error); + + if (error == 0) { + *ap->a_vpp = xvp->v_vnode; + VOP_LOCK(xvp->v_vnode, LK_EXCLUSIVE, td); + } + + return (error); +} + +static int +_xfs_mknod( + struct vop_mknod_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap) +{ + struct vnode *dvp = ap->a_dvp; + struct vattr *vap = ap->a_vap; + struct thread *td = curthread; + struct ucred *credp = td->td_ucred; + struct componentname *cnp = ap->a_cnp; + xfs_vnode_t *xvp; + xfs_vattr_t va; + int error; + + memset(&va, 0, sizeof (va)); + va.va_mask |= XFS_AT_MODE; + va.va_mode = vap->va_mode; + va.va_mask |= XFS_AT_TYPE; + va.va_type = vap->va_type; + va.va_mask |= XFS_AT_RDEV; + va.va_rdev = vap->va_rdev; + + xvp = NULL; + XVOP_CREATE(VPTOXFSVP(dvp), cnp, &va, &xvp, credp, error); + + if (error == 0) { + *ap->a_vpp = xvp->v_vnode; + VOP_LOCK(xvp->v_vnode, LK_EXCLUSIVE, td); + } + + return (error); +} + +static int +_xfs_mkdir( + struct vop_mkdir_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap) +{ + struct vnode *dvp = ap->a_dvp; + struct vattr *vap = ap->a_vap; + struct thread *td = curthread; + struct ucred *credp = td->td_ucred; + struct componentname *cnp = ap->a_cnp; + xfs_vnode_t *xvp; + xfs_vattr_t va; + int error; + + memset(&va, 0, sizeof (va)); + va.va_mask |= XFS_AT_MODE; + va.va_mode = vap->va_mode; + va.va_mask |= XFS_AT_TYPE; + va.va_type = vap->va_type; + + xvp = NULL; + XVOP_MKDIR(VPTOXFSVP(dvp), cnp, &va, &xvp, credp, error); + + if (error == 0) { + *ap->a_vpp = xvp->v_vnode; + VOP_LOCK(xvp->v_vnode, LK_EXCLUSIVE, td); + } + + return (error); +} + +static int +_xfs_rmdir( + struct vop_rmdir_args /* { + struct vnode *a_dvp; + struct vnode *a_vp; + struct componentname *a_cnp; + } */ *ap) +{ + struct vnode *vp = ap->a_vp; + struct vnode *dvp = ap->a_dvp; +/* struct componentname *cnp = ap->a_cnp; */ + int error; + + if (dvp == vp) + return (EINVAL); + + error = EPERM; + + return (error); +} + +static int +_xfs_readdir( + struct vop_readdir_args /* { + struct vnode *a_vp; + struct uio *a_uio; + struct ucred *a_cred; + int *a_eofflag; + int *a_ncookies; + u_long **a_cookies; + } */ *ap) +{ + struct vnode *vp = ap->a_vp; + struct uio *uio = ap->a_uio; + int error; + off_t off; + int eof = 0; + + if (vp->v_type != VDIR) + return (EPERM); + if (ap->a_ncookies) { + return (EOPNOTSUPP); + } + + error = 0; + while (!eof){ + off = (int)uio->uio_offset; + + XVOP_READDIR(VPTOXFSVP(vp), uio, NULL, &eof, error); + if ((uio->uio_offset == off) || error) { + break; + } + } + + if (ap->a_eofflag) + *ap->a_eofflag = (eof != 0); + + return (error); +} + + +static int +_xfs_readlink( + struct vop_readlink_args /* { + struct vnode *a_vp; + struct uio *a_uio; + struct ucred *a_cred; + } */ *ap) +{ + struct vnode *vp = ap->a_vp; + struct uio *uio = ap->a_uio; + struct ucred *cred = ap->a_cred; + int error; + + XVOP_READLINK(VPTOXFSVP(vp), uio, 0, cred, error); + return (error); +} + +static int +_xfs_fsync( + struct vop_fsync_args /* { + struct vnode * a_vp; + int a_waitfor; + struct thread * a_td; + } */ *ap) +{ + xfs_vnode_t *vp = VPTOXFSVP(ap->a_vp); + int flags = FSYNC_DATA; + int error; + + if (ap->a_waitfor == MNT_WAIT) + flags |= FSYNC_WAIT; + XVOP_FSYNC(vp, flags, ap->a_td->td_ucred, (xfs_off_t)-1, (xfs_off_t)-1, + error); + + return (error); +} + +static int +_xfs_bmap( + struct vop_bmap_args /* { + struct vnode *a_vp; + daddr_t a_bn; + struct bufobj **a_bop; + daddr_t *a_bnp; + int *a_runp; + int *a_runb; + } */ *ap) +{ + xfs_iomap_t iomap; + xfs_off_t offset; + ssize_t size; + struct mount *mp; + struct xfs_mount *xmp; + struct xfs_vnode *xvp; + int error, maxrun, retbm; + + mp = ap->a_vp->v_mount; + xmp = XFS_VFSTOM(MNTTOVFS(mp)); + if (ap->a_bop != NULL) + *ap->a_bop = &xmp->m_ddev_targp->specvp->v_bufobj; + if (ap->a_bnp == NULL) + return (0); + + xvp = VPTOXFSVP(ap->a_vp); + retbm = 1; + + offset = XFS_FSB_TO_B(xmp, ap->a_bn); + size = XFS_FSB_TO_B(xmp, 1); + XVOP_BMAP(xvp, offset, size, BMAPI_READ, &iomap, &retbm, error); + if (error) + return (error); + if (retbm == 0 || iomap.iomap_bn == IOMAP_DADDR_NULL) { + *ap->a_bnp = (daddr_t)-1; + if (ap->a_runb) + *ap->a_runb = 0; + if (ap->a_runp) + *ap->a_runp = 0; + } else { + *ap->a_bnp = iomap.iomap_bn + btodb(iomap.iomap_delta); + maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1; + if (ap->a_runb) { + *ap->a_runb = XFS_B_TO_FSB(xmp, iomap.iomap_delta); + if (*ap->a_runb > maxrun) + *ap->a_runb = maxrun; + } + if (ap->a_runp) { + *ap->a_runp = + XFS_B_TO_FSB(xmp, iomap.iomap_bsize + - iomap.iomap_delta - size); + if (*ap->a_runp > maxrun) + *ap->a_runp = maxrun; + } + } + return (0); +} + +static int +_xfs_strategy( + struct vop_strategy_args /* { + struct vnode *a_vp; + struct buf *a_bp; + } */ *ap) +{ + daddr_t blkno; + struct buf *bp;; + struct bufobj *bo; + struct vnode *vp; + struct xfs_mount *xmp; + int error; + + bp = ap->a_bp; + vp = ap->a_vp; + + KASSERT(ap->a_vp == ap->a_bp->b_vp, ("%s(%p != %p)", + __func__, ap->a_vp, ap->a_bp->b_vp)); + if (bp->b_blkno == bp->b_lblkno) { + error = VOP_BMAP(vp, bp->b_lblkno, NULL, &blkno, NULL, NULL); + bp->b_blkno = blkno; + bp->b_iooffset = (blkno << BBSHIFT); + if (error) { + bp->b_error = error; + bp->b_ioflags |= BIO_ERROR; + bufdone(bp); + return (error); + } + if ((long)bp->b_blkno == -1) + vfs_bio_clrbuf(bp); + } + if ((long)bp->b_blkno == -1) { + bufdone(bp); + return (0); + } + + xmp = XFS_VFSTOM(MNTTOVFS(vp->v_mount)); + bo = &xmp->m_ddev_targp->specvp->v_bufobj; + bo->bo_ops->bop_strategy(bo, bp); + return (0); +} + +int +_xfs_ioctl( + struct vop_ioctl_args /* { + struct vnode *a_vp; + u_long a_command; + caddr_t a_data; + int fflag; + struct ucred *cred; + struct thread *a_td; + } */ *ap) +{ +/* struct vnode *vp = ap->a_vp; */ +/* struct thread *p = ap->a_td; */ +/* struct file *fp; */ + int error; + + switch (ap->a_command) { + default: + error = EINVAL; + } + return (error); +} + +int +_xfs_advlock( + struct vop_advlock_args /* { + struct vnode *a_vp; + caddr_t a_id; + int a_op; + struct flock *a_fl; + int a_flags; + } */ *ap) +{ +/* struct vnode *vp = ap->a_vp;*/ + struct flock *fl = ap->a_fl; +/* caddr_t id = (caddr_t)1 */ /* ap->a_id */; +/* int flags = ap->a_flags; */ + off_t start, end, size; + int error/* , lkop */; + + /*KAN: temp */ + return (EOPNOTSUPP); + + size = 0; + error = 0; + switch (fl->l_whence) { + case SEEK_SET: + case SEEK_CUR: + start = fl->l_start; + break; + case SEEK_END: + start = fl->l_start + size; + default: + return (EINVAL); + } + if (start < 0) + return (EINVAL); + if (fl->l_len == 0) + end = -1; + else { + end = start + fl->l_len - 1; + if (end < start) + return (EINVAL); + } +#ifdef notyet + switch (ap->a_op) { + case F_SETLK: + error = lf_advlock(ap, &np->n_lockf, size); + break; + case F_UNLCK: + lf_advlock(ap, &np->n_lockf, size); + break; + case F_GETLK: + error = lf_advlock(ap, &np->n_lockf, size); + break; + default: + return (EINVAL); + } +#endif + return (error); +} + +static int +_xfs_cachedlookup( + struct vop_cachedlookup_args /* { + struct vnode * a_dvp; + struct vnode ** a_vpp; + struct componentname * a_cnp; + } */ *ap) +{ + struct vnode *dvp, *tvp; + struct xfs_vnode *cvp; + int islastcn; + int error; + struct vnode **vpp = ap->a_vpp; + struct componentname *cnp = ap->a_cnp; + struct ucred *cred = cnp->cn_cred; + int flags = cnp->cn_flags; + int nameiop = cnp->cn_nameiop; + struct thread *td = cnp->cn_thread; + + char *pname = cnp->cn_nameptr; + int namelen = cnp->cn_namelen; + + *vpp = NULL; + dvp = ap->a_dvp; + islastcn = flags & ISLASTCN; + + XVOP_LOOKUP(VPTOXFSVP(dvp), cnp, &cvp, 0, NULL, cred, error); + + if (error == ENOENT) { + if ((nameiop == CREATE || nameiop == RENAME || + nameiop == DELETE) && islastcn) + { + error = VOP_ACCESS(dvp, VWRITE, cred, td); + if (error) + return (error); + cnp->cn_flags |= SAVENAME; + return (EJUSTRETURN); + } + if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) + cache_enter(dvp, *vpp, cnp); + return (error); + } + if (error) + return (error); + + tvp = cvp->v_vnode; + + if (nameiop == DELETE && islastcn) { + if ((error = vn_lock(tvp, LK_EXCLUSIVE, td))) { + vrele(tvp); + goto err_out; + } + *vpp = tvp; + + /* Directory should be writable for deletes. */ + error = VOP_ACCESS(dvp, VWRITE, cred, td); + if (error) + goto err_out; + + /* XXXKAN: Permission checks for sticky dirs? */ + return (0); + } + + if (nameiop == RENAME && islastcn) { + if ((error = vn_lock(tvp, LK_EXCLUSIVE, td))) { + vrele(tvp); + goto err_out; + } + *vpp = tvp; + + if ((error = VOP_ACCESS(dvp, VWRITE, cred, td))) + goto err_out; + return (0); + } + + if (flags & ISDOTDOT) { + VOP_UNLOCK(dvp, 0, td); + error = vn_lock(tvp, cnp->cn_lkflags, td); + if (error) { + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td); + vrele(tvp); + goto err_out; + } + *vpp = tvp; + } else if (namelen == 1 && pname[0] == '.') { + *vpp = tvp; + KASSERT(tvp == dvp, ("not same directory")); + } else { + if ((error = vn_lock(tvp, cnp->cn_lkflags, td))) { + vrele(tvp); + goto err_out; + } + *vpp = tvp; + } + + if (cnp->cn_flags & MAKEENTRY) + cache_enter(dvp, *vpp, cnp); + return (0); + +err_out: + if (*vpp != 0) + vput(*vpp); + return (error); +} + +static int +_xfs_reclaim( + struct vop_reclaim_args /* { + struct vnode *a_vp; + struct thread *a_td; + } */ *ap) +{ + + struct vnode *vp = ap->a_vp; + struct xfs_vnode *xfs_vp = VPTOXFSVP(vp); + int error; + + XVOP_RECLAIM(xfs_vp, error); + kmem_free(xfs_vp, sizeof(*xfs_vp)); + vp->v_data = NULL; + return (error); +} + +static int +_xfs_kqfilter( + struct vop_kqfilter_args /* { + struct vnodeop_desc *a_desc; + struct vnode *a_vp; + struct knote *a_kn; + } */ *ap) +{ + return (0); +} + +static __inline +struct xfs_inode * +xfs_vtoi(struct vnode *vp) +{ + if (VPTOXFSVP(vp) != 0) + return (XFS_BHVTOI(VPTOXFSVP(vp)->v_fbhv)); + return (NULL); +} + +/* + * Read wrapper for fifos. + */ +static int +_xfsfifo_read( + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap) +{ + int error, resid; + struct xfs_inode *ip; + struct uio *uio; + + uio = ap->a_uio; + resid = uio->uio_resid; + error = fifo_specops.vop_read(ap); + ip = xfs_vtoi(ap->a_vp); + if ((ap->a_vp->v_mount->mnt_flag & MNT_NOATIME) == 0 && ip != NULL && + (uio->uio_resid != resid || (error == 0 && resid != 0))) + xfs_ichgtime(ip, XFS_ICHGTIME_ACC); + return (error); +} + +/* + * Write wrapper for fifos. + */ +static int +_xfsfifo_write( + struct vop_write_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap) +{ + int error, resid; + struct uio *uio; + struct xfs_inode *ip; + + uio = ap->a_uio; + resid = uio->uio_resid; + error = fifo_specops.vop_write(ap); + ip = xfs_vtoi(ap->a_vp); + if (ip != NULL && (uio->uio_resid != resid || + (error == 0 && resid != 0))) + xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + return (error); +} + +/* + * Close wrapper for fifos. + * + * Update the times on the inode then do device close. + */ +static int +_xfsfifo_close( + struct vop_close_args /* { + struct vnode *a_vp; + int a_fflag; + struct ucred *a_cred; + struct thread *a_td; + } */ *ap) +{ + + return (fifo_specops.vop_close(ap)); +} + +/* + * Kqfilter wrapper for fifos. + * + * Fall through to ufs kqfilter routines if needed + */ +static int +_xfsfifo_kqfilter( + struct vop_kqfilter_args /* { + struct vnodeop_desc *a_desc; + struct vnode *a_vp; + struct knote *a_kn; + } */ *ap) +{ + int error; + + error = fifo_specops.vop_kqfilter(ap); + if (error) + error = _xfs_kqfilter(ap); + return (error); +} + +static int +_xfs_getextattr( + struct vop_getextattr_args /* { + struct vnode *a_vp; + int a_attrnamespace; + const char *a_name; + struct uio *a_uio; + size_t *a_size; + struct ucred *a_cred; + struct thread *a_td; + } */ *ap) +{ + int error; + char *value; + int size; + + error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, + ap->a_cred, ap->a_td, VREAD); + if (error) + return (error); + + size = ATTR_MAX_VALUELEN; + value = (char *)kmem_zalloc(size, KM_SLEEP); + if (value == NULL) + return (ENOMEM); + + XVOP_ATTR_GET(VPTOXFSVP(ap->a_vp), ap->a_name, value, &size, 1, + ap->a_cred, error); + + if (ap->a_uio != NULL) { + if (ap->a_uio->uio_iov->iov_len < size) + error = ERANGE; + else + uiomove(value, size, ap->a_uio); + } + + if (ap->a_size != NULL) + *ap->a_size = size; + + kmem_free(value, ATTR_MAX_VALUELEN); + return (error); +} + +static int +_xfs_listextattr( + struct vop_listextattr_args /* { + struct vnode *a_vp; + int a_attrnamespace; + struct uio *a_uio; + size_t *a_size; + struct ucred *a_cred; + struct thread *a_td; + } */ *ap) +{ + int error; + char *buf = NULL; + int buf_len = 0; + attrlist_cursor_kern_t cursor = { 0 }; + int i; + char name_len; + int attrnames_len = 0; + int xfs_flags = ATTR_KERNAMELS; + + error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, + ap->a_cred, ap->a_td, VREAD); + if (error) + return (error); + + if (ap->a_attrnamespace & EXTATTR_NAMESPACE_USER) + xfs_flags |= ATTR_KERNORMALS; + + if (ap->a_attrnamespace & EXTATTR_NAMESPACE_SYSTEM) + xfs_flags |= ATTR_KERNROOTLS; + + if (ap->a_uio == NULL || ap->a_uio->uio_iov[0].iov_base == NULL) { + xfs_flags |= ATTR_KERNOVAL; + buf_len = 0; + } else { + buf = ap->a_uio->uio_iov[0].iov_base; + buf_len = ap->a_uio->uio_iov[0].iov_len; + } + + XVOP_ATTR_LIST(VPTOXFSVP(ap->a_vp), buf, buf_len, xfs_flags, + &cursor, ap->a_cred, error); + if (error < 0) { + attrnames_len = -error; + error = 0; + } + if (buf == NULL) + goto done; + + /* + * extattr_list expects a list of names. Each list + * entry consists of one byte for the name length, followed + * by the name (not null terminated) + */ + name_len=0; + for(i=attrnames_len-1; i > 0 ; --i) { + buf[i] = buf[i-1]; + if (buf[i]) + ++name_len; + else { + buf[i] = name_len; + name_len = 0; + } + } + buf[0] = name_len; + + if (ap->a_uio != NULL) + ap->a_uio->uio_resid -= attrnames_len; + +done: + if (ap->a_size != NULL) + *ap->a_size = attrnames_len; + + return (error); +} diff --git a/sys/gnu/fs/xfs/FreeBSD/xfsdmapistubs.c b/sys/gnu/fs/xfs/FreeBSD/xfsdmapistubs.c new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfsdmapistubs.c diff --git a/sys/gnu/fs/xfs/FreeBSD/xfsquotasstubs.c b/sys/gnu/fs/xfs/FreeBSD/xfsquotasstubs.c new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfsquotasstubs.c diff --git a/sys/gnu/fs/xfs/FreeBSD/xfsrtstubs.c b/sys/gnu/fs/xfs/FreeBSD/xfsrtstubs.c new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/sys/gnu/fs/xfs/FreeBSD/xfsrtstubs.c diff --git a/sys/gnu/fs/xfs/xfs.h b/sys/gnu/fs/xfs/xfs.h new file mode 100644 index 000000000000..8fd62ca69e61 --- /dev/null +++ b/sys/gnu/fs/xfs/xfs.h @@ -0,0 +1,39 @@ + +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_H__ +#define __XFS_H__ + +#include <xfs_freebsd.h> +#include <xfs_fs.h> + +#endif /* __XFS_H__ */ diff --git a/sys/gnu/fs/xfs/xfs_acl.c b/sys/gnu/fs/xfs/xfs_acl.c new file mode 100644 index 000000000000..e33e77394c02 --- /dev/null +++ b/sys/gnu/fs/xfs/xfs_acl.c @@ -0,0 +1,978 @@ +/* + * Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" + +#include "xfs_inum.h" +#include "xfs_dir.h" +#include "xfs_dir2.h" +#include "xfs_alloc_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h" +#include "xfs_attr_sf.h" +#include "xfs_dir_sf.h" +#include "xfs_dir2_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_acl.h" +#include "xfs_mac.h" +#include "xfs_attr.h" + +#include <linux/posix_acl_xattr.h> + +STATIC int xfs_acl_setmode(xfs_vnode_t *, xfs_acl_t *, int *); +STATIC void xfs_acl_filter_mode(mode_t, xfs_acl_t *); +STATIC void xfs_acl_get_endian(xfs_acl_t *); +STATIC int xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *); +STATIC int xfs_acl_invalid(xfs_acl_t *); +STATIC void xfs_acl_sync_mode(mode_t, xfs_acl_t *); +STATIC void xfs_acl_get_attr(xfs_vnode_t *, xfs_acl_t *, int, int, int *); +STATIC void xfs_acl_set_attr(xfs_vnode_t *, xfs_acl_t *, int, int *); +STATIC int xfs_acl_allow_set(xfs_vnode_t *, int); + +kmem_zone_t *xfs_acl_zone; + + +/* + * Test for existence of access ACL attribute as efficiently as possible. + */ +int +xfs_acl_vhasacl_access( + xfs_vnode_t *vp) +{ + int error; + + xfs_acl_get_attr(vp, NULL, _ACL_TYPE_ACCESS, ATTR_KERNOVAL, &error); + return (error == 0); +} + +/* + * Test for existence of default ACL attribute as efficiently as possible. + */ +int +xfs_acl_vhasacl_default( + xfs_vnode_t *vp) +{ + int error; + + if (vp->v_type != VDIR) + return 0; + xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error); + return (error == 0); +} + +/* + * Convert from extended attribute representation to in-memory for XFS. + */ +STATIC int +posix_acl_xattr_to_xfs( + posix_acl_xattr_header *src, + size_t size, + xfs_acl_t *dest) +{ + posix_acl_xattr_entry *src_entry; + xfs_acl_entry_t *dest_entry; + int n; + + if (!src || !dest) + return EINVAL; + + if (size < sizeof(posix_acl_xattr_header)) + return EINVAL; + + if (src->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION)) + return EINVAL; + + memset(dest, 0, sizeof(xfs_acl_t)); + dest->acl_cnt = posix_acl_xattr_count(size); + if (dest->acl_cnt < 0 || dest->acl_cnt > XFS_ACL_MAX_ENTRIES) + return EINVAL; + + /* + * acl_set_file(3) may request that we set default ACLs with + * zero length -- defend (gracefully) against that here. + */ + if (!dest->acl_cnt) + return 0; + + src_entry = (posix_acl_xattr_entry *)((char *)src + sizeof(*src)); + dest_entry = &dest->acl_entry[0]; + + for (n = 0; n < dest->acl_cnt; n++, src_entry++, dest_entry++) { + dest_entry->ae_perm = le16_to_cpu(src_entry->e_perm); + if (_ACL_PERM_INVALID(dest_entry->ae_perm)) + return EINVAL; + dest_entry->ae_tag = le16_to_cpu(src_entry->e_tag); + switch(dest_entry->ae_tag) { + case ACL_USER: + case ACL_GROUP: + dest_entry->ae_id = le32_to_cpu(src_entry->e_id); + break; + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + dest_entry->ae_id = ACL_UNDEFINED_ID; + break; + default: + return EINVAL; + } + } + if (xfs_acl_invalid(dest)) + return EINVAL; + + return 0; +} + +/* + * Comparison function called from qsort(). + * Primary key is ae_tag, secondary key is ae_id. + */ +STATIC int +xfs_acl_entry_compare( + const void *va, + const void *vb) +{ + xfs_acl_entry_t *a = (xfs_acl_entry_t *)va, + *b = (xfs_acl_entry_t *)vb; + + if (a->ae_tag == b->ae_tag) + return (a->ae_id - b->ae_id); + return (a->ae_tag - b->ae_tag); +} + +/* + * Convert from in-memory XFS to extended attribute representation. + */ +STATIC int +posix_acl_xfs_to_xattr( + xfs_acl_t *src, + posix_acl_xattr_header *dest, + size_t size) +{ + int n; + size_t new_size = posix_acl_xattr_size(src->acl_cnt); + posix_acl_xattr_entry *dest_entry; + xfs_acl_entry_t *src_entry; + + if (size < new_size) + return -ERANGE; + + /* Need to sort src XFS ACL by <ae_tag,ae_id> */ + qsort(src->acl_entry, src->acl_cnt, sizeof(src->acl_entry[0]), + xfs_acl_entry_compare); + + dest->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION); + dest_entry = &dest->a_entries[0]; + src_entry = &src->acl_entry[0]; + for (n = 0; n < src->acl_cnt; n++, dest_entry++, src_entry++) { + dest_entry->e_perm = cpu_to_le16(src_entry->ae_perm); + if (_ACL_PERM_INVALID(src_entry->ae_perm)) + return -EINVAL; + dest_entry->e_tag = cpu_to_le16(src_entry->ae_tag); + switch (src_entry->ae_tag) { + case ACL_USER: + case ACL_GROUP: + dest_entry->e_id = cpu_to_le32(src_entry->ae_id); + break; + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + dest_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID); + break; + default: + return -EINVAL; + } + } + return new_size; +} + +int +xfs_acl_vget( + xfs_vnode_t *vp, + void *acl, + size_t size, + int kind) +{ + int error; + xfs_acl_t *xfs_acl = NULL; + posix_acl_xattr_header *ext_acl = acl; + int flags = 0; + + VN_HOLD(vp); + if ((error = _MAC_VACCESS(vp, NULL, VREAD))) + goto out; + if(size) { + if (!(_ACL_ALLOC(xfs_acl))) { + error = ENOMEM; + goto out; + } + memset(xfs_acl, 0, sizeof(xfs_acl_t)); + } else + flags = ATTR_KERNOVAL; + + xfs_acl_get_attr(vp, xfs_acl, kind, flags, &error); + if (error) + goto out; + + if (!size) { + error = -posix_acl_xattr_size(XFS_ACL_MAX_ENTRIES); + } else { + if (xfs_acl_invalid(xfs_acl)) { + error = EINVAL; + goto out; + } + if (kind == _ACL_TYPE_ACCESS) { + xfs_vattr_t va; + + va.va_mask = XFS_AT_MODE; + XVOP_GETATTR(vp, &va, 0, sys_cred, error); + if (error) + goto out; + xfs_acl_sync_mode(va.va_mode, xfs_acl); + } + error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size); + } +out: + VN_RELE(vp); + if(xfs_acl) + _ACL_FREE(xfs_acl); + return -error; +} + +int +xfs_acl_vremove( + xfs_vnode_t *vp, + int kind) +{ + int error; + + VN_HOLD(vp); + error = xfs_acl_allow_set(vp, kind); + if (!error) { + XVOP_ATTR_REMOVE(vp, kind == _ACL_TYPE_DEFAULT? + SGI_ACL_DEFAULT: SGI_ACL_FILE, + ATTR_ROOT, sys_cred, error); + if (error == ENOATTR) + error = 0; /* 'scool */ + } + VN_RELE(vp); + return -error; +} + +int +xfs_acl_vset( + xfs_vnode_t *vp, + void *acl, + size_t size, + int kind) +{ + posix_acl_xattr_header *ext_acl = acl; + xfs_acl_t *xfs_acl; + int error; + int basicperms = 0; /* more than std unix perms? */ + + if (!acl) + return -EINVAL; + + if (!(_ACL_ALLOC(xfs_acl))) + return -ENOMEM; + + error = posix_acl_xattr_to_xfs(ext_acl, size, xfs_acl); + if (error) { + _ACL_FREE(xfs_acl); + return -error; + } + if (!xfs_acl->acl_cnt) { + _ACL_FREE(xfs_acl); + return 0; + } + + VN_HOLD(vp); + error = xfs_acl_allow_set(vp, kind); + if (error) + goto out; + + /* Incoming ACL exists, set file mode based on its value */ + if (kind == _ACL_TYPE_ACCESS) + xfs_acl_setmode(vp, xfs_acl, &basicperms); + + /* + * If we have more than std unix permissions, set up the actual attr. + * Otherwise, delete any existing attr. This prevents us from + * having actual attrs for permissions that can be stored in the + * standard permission bits. + */ + if (!basicperms) { + xfs_acl_set_attr(vp, xfs_acl, kind, &error); + } else { + xfs_acl_vremove(vp, _ACL_TYPE_ACCESS); + } + + +out: + VN_RELE(vp); + _ACL_FREE(xfs_acl); + return -error; +} + +int +xfs_acl_iaccess( + xfs_inode_t *ip, + mode_t mode, + cred_t *cr) +{ + xfs_acl_t *acl; + int error; + + if (!(_ACL_ALLOC(acl))) + return -1; + + /* If the file has no ACL return -1. */ + if (xfs_attr_fetch(ip, SGI_ACL_FILE, (char *)acl, sizeof(xfs_acl_t))) { + _ACL_FREE(acl); + return -1; + } + xfs_acl_get_endian(acl); + + /* If the file has an empty ACL return -1. */ + if (acl->acl_cnt == XFS_ACL_NOT_PRESENT) { + _ACL_FREE(acl); + return -1; + } + + /* Synchronize ACL with mode bits */ + xfs_acl_sync_mode(ip->i_d.di_mode, acl); + + error = xfs_acl_access(ip->i_d.di_uid, ip->i_d.di_gid, acl, mode, cr); + _ACL_FREE(acl); + return error; +} + +STATIC int +xfs_acl_allow_set( + xfs_vnode_t *vp, + int kind) +{ + xfs_vattr_t va; + int error; + + if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND)) + return EPERM; + if (kind == _ACL_TYPE_DEFAULT && vp->v_type != VDIR) + return ENOTDIR; + if (vp->v_vfsp->vfs_flag & VFS_RDONLY) + return EROFS; + if ((error = _MAC_VACCESS(vp, NULL, VWRITE))) + return error; + va.va_mask = XFS_AT_UID; + XVOP_GETATTR(vp, &va, 0, NULL, error); + if (error) + return error; + if (va.va_uid != current->fsuid && !capable(CAP_FOWNER)) + return EPERM; + return error; +} + +/* + * Look for any effective exec access, to allow CAP_DAC_OVERRIDE for exec. + * Ignore checking for exec in USER_OBJ when there is no mask, because + * in this "minimal acl" case we don't have any actual acls, and we + * won't even be here. + */ +STATIC int +xfs_acl_find_any_exec( + xfs_acl_t *fap) +{ + int i; + int masked_aces = 0; + int mask = 0; + + for (i = 0; i < fap->acl_cnt; i++) { + if (fap->acl_entry[i].ae_perm & ACL_EXECUTE) { + if (fap->acl_entry[i].ae_tag & (ACL_USER_OBJ|ACL_OTHER)) + return 1; + + if (fap->acl_entry[i].ae_tag == ACL_MASK) + mask = fap->acl_entry[i].ae_perm; + else + masked_aces |= fap->acl_entry[i].ae_perm; + + if ((mask & masked_aces) & ACL_EXECUTE) + return 1; + } + } + + return 0; +} + +/* + * The access control process to determine the access permission: + * if uid == file owner id, use the file owner bits. + * if gid == file owner group id, use the file group bits. + * scan ACL for a maching user or group, and use matched entry + * permission. Use total permissions of all matching group entries, + * until all acl entries are exhausted. The final permission produced + * by matching acl entry or entries needs to be & with group permission. + * if not owner, owning group, or matching entry in ACL, use file + * other bits. Don't allow CAP_DAC_OVERRIDE on exec access unless + * there is some effective exec access somewhere. + */ +STATIC int +xfs_acl_capability_check( + mode_t mode, + cred_t *cr, + xfs_acl_t *fap) +{ + if ((mode & ACL_READ) && !capable_cred(cr, CAP_DAC_READ_SEARCH)) + return EACCES; + if ((mode & ACL_WRITE) && !capable_cred(cr, CAP_DAC_OVERRIDE)) + return EACCES; + if ((mode & ACL_EXECUTE) && + (!capable_cred(cr, CAP_DAC_OVERRIDE) || + !xfs_acl_find_any_exec(fap))) { + return EACCES; + } + + return 0; +} + +/* + * Note: cr is only used here for the capability check if the ACL test fails. + * It is not used to find out the credentials uid or groups etc, as was + * done in IRIX. It is assumed that the uid and groups for the current + * thread are taken from "current" instead of the cr parameter. + */ +STATIC int +xfs_acl_access( + uid_t fuid, + gid_t fgid, + xfs_acl_t *fap, + mode_t md, + cred_t *cr) +{ + xfs_acl_entry_t matched; + int i, allows; + int maskallows = -1; /* true, but not 1, either */ + int seen_userobj = 0; + + matched.ae_tag = 0; /* Invalid type */ + md >>= 6; /* Normalize the bits for comparison */ + + for (i = 0; i < fap->acl_cnt; i++) { + /* + * Break out if we've got a user_obj entry or + * a user entry and the mask (and have processed USER_OBJ) + */ + if (matched.ae_tag == ACL_USER_OBJ) + break; + if (matched.ae_tag == ACL_USER) { + if (maskallows != -1 && seen_userobj) + break; + if (fap->acl_entry[i].ae_tag != ACL_MASK && + fap->acl_entry[i].ae_tag != ACL_USER_OBJ) + continue; + } + /* True if this entry allows the requested access */ + allows = ((fap->acl_entry[i].ae_perm & md) == md); + + switch (fap->acl_entry[i].ae_tag) { + case ACL_USER_OBJ: + seen_userobj = 1; + if (fuid != current->fsuid) + continue; + matched.ae_tag = ACL_USER_OBJ; + matched.ae_perm = allows; + break; + case ACL_USER: + if (fap->acl_entry[i].ae_id != current->fsuid) + continue; + matched.ae_tag = ACL_USER; + matched.ae_perm = allows; + break; + case ACL_GROUP_OBJ: + if ((matched.ae_tag == ACL_GROUP_OBJ || + matched.ae_tag == ACL_GROUP) && !allows) + continue; + if (!in_group_p(fgid)) + continue; + matched.ae_tag = ACL_GROUP_OBJ; + matched.ae_perm = allows; + break; + case ACL_GROUP: + if ((matched.ae_tag == ACL_GROUP_OBJ || + matched.ae_tag == ACL_GROUP) && !allows) + continue; + if (!in_group_p(fap->acl_entry[i].ae_id)) + continue; + matched.ae_tag = ACL_GROUP; + matched.ae_perm = allows; + break; + case ACL_MASK: + maskallows = allows; + break; + case ACL_OTHER: + if (matched.ae_tag != 0) + continue; + matched.ae_tag = ACL_OTHER; + matched.ae_perm = allows; + break; + } + } + /* + * First possibility is that no matched entry allows access. + * The capability to override DAC may exist, so check for it. + */ + switch (matched.ae_tag) { + case ACL_OTHER: + case ACL_USER_OBJ: + if (matched.ae_perm) + return 0; + break; + case ACL_USER: + case ACL_GROUP_OBJ: + case ACL_GROUP: + if (maskallows && matched.ae_perm) + return 0; + break; + case 0: + break; + } + + return xfs_acl_capability_check(md, cr, fap); +} + +/* + * ACL validity checker. + * This acl validation routine checks each ACL entry read in makes sense. + */ +STATIC int +xfs_acl_invalid( + xfs_acl_t *aclp) +{ + xfs_acl_entry_t *entry, *e; + int user = 0, group = 0, other = 0, mask = 0; + int mask_required = 0; + int i, j; + + if (!aclp) + goto acl_invalid; + + if (aclp->acl_cnt > XFS_ACL_MAX_ENTRIES) + goto acl_invalid; + + for (i = 0; i < aclp->acl_cnt; i++) { + entry = &aclp->acl_entry[i]; + switch (entry->ae_tag) { + case ACL_USER_OBJ: + if (user++) + goto acl_invalid; + break; + case ACL_GROUP_OBJ: + if (group++) + goto acl_invalid; + break; + case ACL_OTHER: + if (other++) + goto acl_invalid; + break; + case ACL_USER: + case ACL_GROUP: + for (j = i + 1; j < aclp->acl_cnt; j++) { + e = &aclp->acl_entry[j]; + if (e->ae_id == entry->ae_id && + e->ae_tag == entry->ae_tag) + goto acl_invalid; + } + mask_required++; + break; + case ACL_MASK: + if (mask++) + goto acl_invalid; + break; + default: + goto acl_invalid; + } + } + if (!user || !group || !other || (mask_required && !mask)) + goto acl_invalid; + else + return 0; +acl_invalid: + return EINVAL; +} + +/* + * Do ACL endian conversion. + */ +STATIC void +xfs_acl_get_endian( + xfs_acl_t *aclp) +{ + xfs_acl_entry_t *ace, *end; + + INT_SET(aclp->acl_cnt, ARCH_CONVERT, aclp->acl_cnt); + end = &aclp->acl_entry[0]+aclp->acl_cnt; + for (ace = &aclp->acl_entry[0]; ace < end; ace++) { + INT_SET(ace->ae_tag, ARCH_CONVERT, ace->ae_tag); + INT_SET(ace->ae_id, ARCH_CONVERT, ace->ae_id); + INT_SET(ace->ae_perm, ARCH_CONVERT, ace->ae_perm); + } +} + +/* + * Get the ACL from the EA and do endian conversion. + */ +STATIC void +xfs_acl_get_attr( + xfs_vnode_t *vp, + xfs_acl_t *aclp, + int kind, + int flags, + int *error) +{ + int len = sizeof(xfs_acl_t); + + ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1); + flags |= ATTR_ROOT; + XVOP_ATTR_GET(vp, + kind == _ACL_TYPE_ACCESS ? SGI_ACL_FILE : SGI_ACL_DEFAULT, + (char *)aclp, &len, flags, sys_cred, *error); + if (*error || (flags & ATTR_KERNOVAL)) + return; + xfs_acl_get_endian(aclp); +} + +/* + * Set the EA with the ACL and do endian conversion. + */ +STATIC void +xfs_acl_set_attr( + xfs_vnode_t *vp, + xfs_acl_t *aclp, + int kind, + int *error) +{ + xfs_acl_entry_t *ace, *newace, *end; + xfs_acl_t *newacl; + int len; + + if (!(_ACL_ALLOC(newacl))) { + *error = ENOMEM; + return; + } + + len = sizeof(xfs_acl_t) - + (sizeof(xfs_acl_entry_t) * (XFS_ACL_MAX_ENTRIES - aclp->acl_cnt)); + end = &aclp->acl_entry[0]+aclp->acl_cnt; + for (ace = &aclp->acl_entry[0], newace = &newacl->acl_entry[0]; + ace < end; + ace++, newace++) { + INT_SET(newace->ae_tag, ARCH_CONVERT, ace->ae_tag); + INT_SET(newace->ae_id, ARCH_CONVERT, ace->ae_id); + INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm); + } + INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt); + XVOP_ATTR_SET(vp, + kind == _ACL_TYPE_ACCESS ? SGI_ACL_FILE: SGI_ACL_DEFAULT, + (char *)newacl, len, ATTR_ROOT, sys_cred, *error); + _ACL_FREE(newacl); +} + +int +xfs_acl_vtoacl( + xfs_vnode_t *vp, + xfs_acl_t *access_acl, + xfs_acl_t *default_acl) +{ + xfs_vattr_t va; + int error = 0; + + if (access_acl) { + /* + * Get the Access ACL and the mode. If either cannot + * be obtained for some reason, invalidate the access ACL. + */ + xfs_acl_get_attr(vp, access_acl, _ACL_TYPE_ACCESS, 0, &error); + if (!error) { + /* Got the ACL, need the mode... */ + va.va_mask = XFS_AT_MODE; + XVOP_GETATTR(vp, &va, 0, sys_cred, error); + } + + if (error) + access_acl->acl_cnt = XFS_ACL_NOT_PRESENT; + else /* We have a good ACL and the file mode, synchronize. */ + xfs_acl_sync_mode(va.va_mode, access_acl); + } + + if (default_acl) { + xfs_acl_get_attr(vp, default_acl, _ACL_TYPE_DEFAULT, 0, &error); + if (error) + default_acl->acl_cnt = XFS_ACL_NOT_PRESENT; + } + return error; +} + +/* + * This function retrieves the parent directory's acl, processes it + * and lets the child inherit the acl(s) that it should. + */ +int +xfs_acl_inherit( + xfs_vnode_t *vp, + xfs_vattr_t *vap, + xfs_acl_t *pdaclp) +{ + xfs_acl_t *cacl; + int error = 0; + int basicperms = 0; + + /* + * If the parent does not have a default ACL, or it's an + * invalid ACL, we're done. + */ + if (!vp) + return 0; + if (!pdaclp || xfs_acl_invalid(pdaclp)) + return 0; + + /* + * Copy the default ACL of the containing directory to + * the access ACL of the new file and use the mode that + * was passed in to set up the correct initial values for + * the u::,g::[m::], and o:: entries. This is what makes + * umask() "work" with ACL's. + */ + + if (!(_ACL_ALLOC(cacl))) + return ENOMEM; + + memcpy(cacl, pdaclp, sizeof(xfs_acl_t)); + xfs_acl_filter_mode(vap->va_mode, cacl); + xfs_acl_setmode(vp, cacl, &basicperms); + + /* + * Set the Default and Access ACL on the file. The mode is already + * set on the file, so we don't need to worry about that. + * + * If the new file is a directory, its default ACL is a copy of + * the containing directory's default ACL. + */ + if (vp->v_type == VDIR) + xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error); + if (!error && !basicperms) + xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error); + _ACL_FREE(cacl); + return error; +} + +/* + * Set up the correct mode on the file based on the supplied ACL. This + * makes sure that the mode on the file reflects the state of the + * u::,g::[m::], and o:: entries in the ACL. Since the mode is where + * the ACL is going to get the permissions for these entries, we must + * synchronize the mode whenever we set the ACL on a file. + */ +STATIC int +xfs_acl_setmode( + xfs_vnode_t *vp, + xfs_acl_t *acl, + int *basicperms) +{ + xfs_vattr_t va; + xfs_acl_entry_t *ap; + xfs_acl_entry_t *gap = NULL; + int i, error, nomask = 1; + + *basicperms = 1; + + if (acl->acl_cnt == XFS_ACL_NOT_PRESENT) + return 0; + + /* + * Copy the u::, g::, o::, and m:: bits from the ACL into the + * mode. The m:: bits take precedence over the g:: bits. + */ + va.va_mask = XFS_AT_MODE; + XVOP_GETATTR(vp, &va, 0, sys_cred, error); + if (error) + return error; + + va.va_mask = XFS_AT_MODE; + va.va_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO); + ap = acl->acl_entry; + for (i = 0; i < acl->acl_cnt; ++i) { + switch (ap->ae_tag) { + case ACL_USER_OBJ: + va.va_mode |= ap->ae_perm << 6; + break; + case ACL_GROUP_OBJ: + gap = ap; + break; + case ACL_MASK: /* more than just standard modes */ + nomask = 0; + va.va_mode |= ap->ae_perm << 3; + *basicperms = 0; + break; + case ACL_OTHER: + va.va_mode |= ap->ae_perm; + break; + default: /* more than just standard modes */ + *basicperms = 0; + break; + } + ap++; + } + + /* Set the group bits from ACL_GROUP_OBJ if there's no ACL_MASK */ + if (gap && nomask) + va.va_mode |= gap->ae_perm << 3; + + XVOP_SETATTR(vp, &va, 0, sys_cred, error); + return error; +} + +/* + * The permissions for the special ACL entries (u::, g::[m::], o::) are + * actually stored in the file mode (if there is both a group and a mask, + * the group is stored in the ACL entry and the mask is stored on the file). + * This allows the mode to remain automatically in sync with the ACL without + * the need for a call-back to the ACL system at every point where the mode + * could change. This function takes the permissions from the specified mode + * and places it in the supplied ACL. + * + * This implementation draws its validity from the fact that, when the ACL + * was assigned, the mode was copied from the ACL. + * If the mode did not change, therefore, the mode remains exactly what was + * taken from the special ACL entries at assignment. + * If a subsequent chmod() was done, the POSIX spec says that the change in + * mode must cause an update to the ACL seen at user level and used for + * access checks. Before and after a mode change, therefore, the file mode + * most accurately reflects what the special ACL entries should permit/deny. + * + * CAVEAT: If someone sets the SGI_ACL_FILE attribute directly, + * the existing mode bits will override whatever is in the + * ACL. Similarly, if there is a pre-existing ACL that was + * never in sync with its mode (owing to a bug in 6.5 and + * before), it will now magically (or mystically) be + * synchronized. This could cause slight astonishment, but + * it is better than inconsistent permissions. + * + * The supplied ACL is a template that may contain any combination + * of special entries. These are treated as place holders when we fill + * out the ACL. This routine does not add or remove special entries, it + * simply unites each special entry with its associated set of permissions. + */ +STATIC void +xfs_acl_sync_mode( + mode_t mode, + xfs_acl_t *acl) +{ + int i, nomask = 1; + xfs_acl_entry_t *ap; + xfs_acl_entry_t *gap = NULL; + + /* + * Set ACL entries. POSIX1003.1eD16 requires that the MASK + * be set instead of the GROUP entry, if there is a MASK. + */ + for (ap = acl->acl_entry, i = 0; i < acl->acl_cnt; ap++, i++) { + switch (ap->ae_tag) { + case ACL_USER_OBJ: + ap->ae_perm = (mode >> 6) & 0x7; + break; + case ACL_GROUP_OBJ: + gap = ap; + break; + case ACL_MASK: + nomask = 0; + ap->ae_perm = (mode >> 3) & 0x7; + break; + case ACL_OTHER: + ap->ae_perm = mode & 0x7; + break; + default: + break; + } + } + /* Set the ACL_GROUP_OBJ if there's no ACL_MASK */ + if (gap && nomask) + gap->ae_perm = (mode >> 3) & 0x7; +} + +/* + * When inheriting an Access ACL from a directory Default ACL, + * the ACL bits are set to the intersection of the ACL default + * permission bits and the file permission bits in mode. If there + * are no permission bits on the file then we must not give them + * the ACL. This is what what makes umask() work with ACLs. + */ +STATIC void +xfs_acl_filter_mode( + mode_t mode, + xfs_acl_t *acl) +{ + int i, nomask = 1; + xfs_acl_entry_t *ap; + xfs_acl_entry_t *gap = NULL; + + /* + * Set ACL entries. POSIX1003.1eD16 requires that the MASK + * be merged with GROUP entry, if there is a MASK. + */ + for (ap = acl->acl_entry, i = 0; i < acl->acl_cnt; ap++, i++) { + switch (ap->ae_tag) { + case ACL_USER_OBJ: + ap->ae_perm &= (mode >> 6) & 0x7; + break; + case ACL_GROUP_OBJ: + gap = ap; + break; + case ACL_MASK: + nomask = 0; + ap->ae_perm &= (mode >> 3) & 0x7; + break; + case ACL_OTHER: + ap->ae_perm &= mode & 0x7; + break; + default: + break; + } + } + /* Set the ACL_GROUP_OBJ if there's no ACL_MASK */ + if (gap && nomask) + gap->ae_perm &= (mode >> 3) & 0x7; +} + diff --git a/sys/gnu/fs/xfs/xfs_acl.h b/sys/gnu/fs/xfs/xfs_acl.h new file mode 100644 index 000000000000..4da8136d4e4f --- /dev/null +++ b/sys/gnu/fs/xfs/xfs_acl.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2001-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_ACL_H__ +#define __XFS_ACL_H__ + +/* + * Access Control Lists + */ +typedef __uint16_t xfs_acl_perm_t; +typedef __int32_t xfs_acl_type_t; +typedef __int32_t xfs_acl_tag_t; +typedef __int32_t xfs_acl_id_t; + +#define XFS_ACL_MAX_ENTRIES 25 +#define XFS_ACL_NOT_PRESENT (-1) + +typedef struct xfs_acl_entry { + xfs_acl_tag_t ae_tag; + xfs_acl_id_t ae_id; + xfs_acl_perm_t ae_perm; +} xfs_acl_entry_t; + +typedef struct xfs_acl { + __int32_t acl_cnt; + xfs_acl_entry_t acl_entry[XFS_ACL_MAX_ENTRIES]; +} xfs_acl_t; + +/* On-disk XFS extended attribute names */ +#define SGI_ACL_FILE "SGI_ACL_FILE" +#define SGI_ACL_DEFAULT "SGI_ACL_DEFAULT" +#define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1) +#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) + + +#ifdef __KERNEL__ + +#ifdef CONFIG_XFS_POSIX_ACL + +struct xfs_vattr; +struct xfs_vnode; +struct xfs_inode; + +extern int xfs_acl_inherit(struct xfs_vnode *, struct xfs_vattr *, xfs_acl_t *); +extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *); +extern int xfs_acl_get(struct xfs_vnode *, xfs_acl_t *, xfs_acl_t *); +extern int xfs_acl_set(struct xfs_vnode *, xfs_acl_t *, xfs_acl_t *); +extern int xfs_acl_vtoacl(struct xfs_vnode *, xfs_acl_t *, xfs_acl_t *); +extern int xfs_acl_vhasacl_access(struct xfs_vnode *); +extern int xfs_acl_vhasacl_default(struct xfs_vnode *); +extern int xfs_acl_vset(struct xfs_vnode *, void *, size_t, int); +extern int xfs_acl_vget(struct xfs_vnode *, void *, size_t, int); +extern int xfs_acl_vremove(struct xfs_vnode *vp, int); + +extern struct kmem_zone *xfs_acl_zone; + +#define _ACL_TYPE_ACCESS 1 +#define _ACL_TYPE_DEFAULT 2 +#define _ACL_PERM_INVALID(perm) ((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE)) + +#define _ACL_DECL(a) xfs_acl_t *(a) = NULL +#define _ACL_ALLOC(a) ((a) = kmem_zone_alloc(xfs_acl_zone, KM_SLEEP)) +#define _ACL_FREE(a) ((a)? kmem_zone_free(xfs_acl_zone, (a)) : 0) +#define _ACL_ZONE_INIT(z,name) ((z) = kmem_zone_init(sizeof(xfs_acl_t), name)) +#define _ACL_ZONE_DESTROY(z) (kmem_cache_destroy(z)) +#define _ACL_INHERIT(c,v,d) (xfs_acl_inherit(c,v,d)) +#define _ACL_GET_ACCESS(pv,pa) (xfs_acl_vtoacl(pv,pa,NULL) == 0) +#define _ACL_GET_DEFAULT(pv,pd) (xfs_acl_vtoacl(pv,NULL,pd) == 0) +#define _ACL_ACCESS_EXISTS xfs_acl_vhasacl_access +#define _ACL_DEFAULT_EXISTS xfs_acl_vhasacl_default +#define _ACL_XFS_IACCESS(i,m,c) (XFS_IFORK_Q(i) ? xfs_acl_iaccess(i,m,c) : -1) + +#else +#define xfs_acl_vset(v,p,sz,t) (-EOPNOTSUPP) +#define xfs_acl_vget(v,p,sz,t) (-EOPNOTSUPP) +#define xfs_acl_vremove(v,t) (-EOPNOTSUPP) +#define xfs_acl_vhasacl_access(v) (0) +#define xfs_acl_vhasacl_default(v) (0) +#define _ACL_DECL(a) ((void)0) +#define _ACL_ALLOC(a) (1) /* successfully allocate nothing */ +#define _ACL_FREE(a) ((void)0) +#define _ACL_ZONE_INIT(z,name) ((void)0) +#define _ACL_ZONE_DESTROY(z) ((void)0) +#define _ACL_INHERIT(c,v,d) (0) +#define _ACL_GET_ACCESS(pv,pa) (0) +#define _ACL_GET_DEFAULT(pv,pd) (0) +#define _ACL_ACCESS_EXISTS (NULL) +#define _ACL_DEFAULT_EXISTS (NULL) +#define _ACL_XFS_IACCESS(i,m,c) (-1) +#endif + +#endif /* __KERNEL__ */ + +#endif /* __XFS_ACL_H__ */ diff --git a/sys/gnu/fs/xfs/xfs_ag.h b/sys/gnu/fs/xfs/xfs_ag.h new file mode 100644 index 000000000000..151a5cf062fc --- /dev/null +++ b/sys/gnu/fs/xfs/xfs_ag.h @@ -0,0 +1,411 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_AG_H__ +#define __XFS_AG_H__ + +/* + * Allocation group header + * This is divided into three structures, placed in sequential 512-byte + * buffers after a copy of the superblock (also in a 512-byte buffer). + */ + +struct xfs_buf; +struct xfs_mount; +struct xfs_trans; + +#define XFS_AGF_MAGIC 0x58414746 /* 'XAGF' */ +#define XFS_AGI_MAGIC 0x58414749 /* 'XAGI' */ +#define XFS_AGF_VERSION 1 +#define XFS_AGI_VERSION 1 + +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_AGF_GOOD_VERSION) +int xfs_agf_good_version(unsigned v); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGF_GOOD_VERSION) +#define XFS_AGF_GOOD_VERSION(v) xfs_agf_good_version(v) +#else +#define XFS_AGF_GOOD_VERSION(v) ((v) == XFS_AGF_VERSION) +#endif + +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_AGI_GOOD_VERSION) +int xfs_agi_good_version(unsigned v); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGI_GOOD_VERSION) +#define XFS_AGI_GOOD_VERSION(v) xfs_agi_good_version(v) +#else +#define XFS_AGI_GOOD_VERSION(v) ((v) == XFS_AGI_VERSION) +#endif + +/* + * Btree number 0 is bno, 1 is cnt. This value gives the size of the + * arrays below. + */ +#define XFS_BTNUM_AGF ((int)XFS_BTNUM_CNTi + 1) + +/* + * The second word of agf_levels in the first a.g. overlaps the EFS + * superblock's magic number. Since the magic numbers valid for EFS + * are > 64k, our value cannot be confused for an EFS superblock's. + */ + +typedef struct xfs_agf +{ + /* + * Common allocation group header information + */ + __uint32_t agf_magicnum; /* magic number == XFS_AGF_MAGIC */ + __uint32_t agf_versionnum; /* header version == XFS_AGF_VERSION */ + xfs_agnumber_t agf_seqno; /* sequence # starting from 0 */ + xfs_agblock_t agf_length; /* size in blocks of a.g. */ + /* + * Freespace information + */ + xfs_agblock_t agf_roots[XFS_BTNUM_AGF]; /* root blocks */ + __uint32_t agf_spare0; /* spare field */ + __uint32_t agf_levels[XFS_BTNUM_AGF]; /* btree levels */ + __uint32_t agf_spare1; /* spare field */ + __uint32_t agf_flfirst; /* first freelist block's index */ + __uint32_t agf_fllast; /* last freelist block's index */ + __uint32_t agf_flcount; /* count of blocks in freelist */ + xfs_extlen_t agf_freeblks; /* total free blocks */ + xfs_extlen_t agf_longest; /* longest free space */ +} xfs_agf_t; + +#define XFS_AGF_MAGICNUM 0x00000001 +#define XFS_AGF_VERSIONNUM 0x00000002 +#define XFS_AGF_SEQNO 0x00000004 +#define XFS_AGF_LENGTH 0x00000008 +#define XFS_AGF_ROOTS 0x00000010 +#define XFS_AGF_LEVELS 0x00000020 +#define XFS_AGF_FLFIRST 0x00000040 +#define XFS_AGF_FLLAST 0x00000080 +#define XFS_AGF_FLCOUNT 0x00000100 +#define XFS_AGF_FREEBLKS 0x00000200 +#define XFS_AGF_LONGEST 0x00000400 +#define XFS_AGF_NUM_BITS 11 +#define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1) + +/* disk block (xfs_daddr_t) in the AG */ +#define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log)) + +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_AGF_BLOCK) +xfs_agblock_t xfs_agf_block(struct xfs_mount *mp); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGF_BLOCK) +#define XFS_AGF_BLOCK(mp) xfs_agf_block(mp) +#else +#define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp)) +#endif + +/* + * Size of the unlinked inode hash table in the agi. + */ +#define XFS_AGI_UNLINKED_BUCKETS 64 + +typedef struct xfs_agi +{ + /* + * Common allocation group header information + */ + __uint32_t agi_magicnum; /* magic number == XFS_AGI_MAGIC */ + __uint32_t agi_versionnum; /* header version == XFS_AGI_VERSION */ + xfs_agnumber_t agi_seqno; /* sequence # starting from 0 */ + xfs_agblock_t agi_length; /* size in blocks of a.g. */ + /* + * Inode information + * Inodes are mapped by interpreting the inode number, so no + * mapping data is needed here. + */ + xfs_agino_t agi_count; /* count of allocated inodes */ + xfs_agblock_t agi_root; /* root of inode btree */ + __uint32_t agi_level; /* levels in inode btree */ + xfs_agino_t agi_freecount; /* number of free inodes */ + xfs_agino_t agi_newino; /* new inode just allocated */ + xfs_agino_t agi_dirino; /* last directory inode chunk */ + /* + * Hash table of inodes which have been unlinked but are + * still being referenced. + */ + xfs_agino_t agi_unlinked[XFS_AGI_UNLINKED_BUCKETS]; +} xfs_agi_t; + +#define XFS_AGI_MAGICNUM 0x00000001 +#define XFS_AGI_VERSIONNUM 0x00000002 +#define XFS_AGI_SEQNO 0x00000004 +#define XFS_AGI_LENGTH 0x00000008 +#define XFS_AGI_COUNT 0x00000010 +#define XFS_AGI_ROOT 0x00000020 +#define XFS_AGI_LEVEL 0x00000040 +#define XFS_AGI_FREECOUNT 0x00000080 +#define XFS_AGI_NEWINO 0x00000100 +#define XFS_AGI_DIRINO 0x00000200 +#define XFS_AGI_UNLINKED 0x00000400 +#define XFS_AGI_NUM_BITS 11 +#define XFS_AGI_ALL_BITS ((1 << XFS_AGI_NUM_BITS) - 1) + +/* disk block (xfs_daddr_t) in the AG */ +#define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log)) + +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_AGI_BLOCK) +xfs_agblock_t xfs_agi_block(struct xfs_mount *mp); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGI_BLOCK) +#define XFS_AGI_BLOCK(mp) xfs_agi_block(mp) +#else +#define XFS_AGI_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp)) +#endif + +/* + * The third a.g. block contains the a.g. freelist, an array + * of block pointers to blocks owned by the allocation btree code. + */ +#define XFS_AGFL_DADDR(mp) ((xfs_daddr_t)(3 << (mp)->m_sectbb_log)) + +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_AGFL_BLOCK) +xfs_agblock_t xfs_agfl_block(struct xfs_mount *mp); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGFL_BLOCK) +#define XFS_AGFL_BLOCK(mp) xfs_agfl_block(mp) +#else +#define XFS_AGFL_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp)) +#endif +#define XFS_AGFL_SIZE(mp) ((mp)->m_sb.sb_sectsize / sizeof(xfs_agblock_t)) + +typedef struct xfs_agfl { + xfs_agblock_t agfl_bno[1]; /* actually XFS_AGFL_SIZE(mp) */ +} xfs_agfl_t; + +/* + * Busy block/extent entry. Used in perag to mark blocks that have been freed + * but whose transactions aren't committed to disk yet. + */ +typedef struct xfs_perag_busy { + xfs_agblock_t busy_start; + xfs_extlen_t busy_length; + struct xfs_trans *busy_tp; /* transaction that did the free */ +} xfs_perag_busy_t; + +/* + * Per-ag incore structure, copies of information in agf and agi, + * to improve the performance of allocation group selection. + * + * pick sizes which fit in allocation buckets well + */ +#if (BITS_PER_LONG == 32) +#define XFS_PAGB_NUM_SLOTS 84 +#elif (BITS_PER_LONG == 64) +#define XFS_PAGB_NUM_SLOTS 128 +#endif + +typedef struct xfs_perag +{ + char pagf_init; /* this agf's entry is initialized */ + char pagi_init; /* this agi's entry is initialized */ + char pagf_metadata; /* the agf is prefered to be metadata */ + char pagi_inodeok; /* The agi is ok for inodes */ + __uint8_t pagf_levels[XFS_BTNUM_AGF]; + /* # of levels in bno & cnt btree */ + __uint32_t pagf_flcount; /* count of blocks in freelist */ + xfs_extlen_t pagf_freeblks; /* total free blocks */ + xfs_extlen_t pagf_longest; /* longest free space */ + xfs_agino_t pagi_freecount; /* number of free inodes */ +#ifdef __KERNEL__ + lock_t pagb_lock; /* lock for pagb_list */ +#endif + int pagb_count; /* pagb slots in use */ + xfs_perag_busy_t *pagb_list; /* unstable blocks */ +} xfs_perag_t; + +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_AG_MAXLEVELS) +int xfs_ag_maxlevels(struct xfs_mount *mp); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AG_MAXLEVELS) +#define XFS_AG_MAXLEVELS(mp) xfs_ag_maxlevels(mp) +#else +#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) +#endif + +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_MIN_FREELIST) +int xfs_min_freelist(xfs_agf_t *a, struct xfs_mount *mp); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MIN_FREELIST) +#define XFS_MIN_FREELIST(a,mp) xfs_min_freelist(a,mp) +#else +#define XFS_MIN_FREELIST(a,mp) \ + XFS_MIN_FREELIST_RAW( \ + INT_GET((a)->agf_levels[XFS_BTNUM_BNOi], ARCH_CONVERT), \ + INT_GET((a)->agf_levels[XFS_BTNUM_CNTi], ARCH_CONVERT), mp) +#endif + +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_MIN_FREELIST_PAG) +int xfs_min_freelist_pag(xfs_perag_t *pag, struct xfs_mount *mp); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MIN_FREELIST_PAG) +#define XFS_MIN_FREELIST_PAG(pag,mp) xfs_min_freelist_pag(pag,mp) +#else +#define XFS_MIN_FREELIST_PAG(pag,mp) \ + XFS_MIN_FREELIST_RAW((uint_t)(pag)->pagf_levels[XFS_BTNUM_BNOi], \ + (uint_t)(pag)->pagf_levels[XFS_BTNUM_CNTi], mp) +#endif + +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_MIN_FREELIST_RAW) +int xfs_min_freelist_raw(uint bl, uint cl, struct xfs_mount *mp); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MIN_FREELIST_RAW) +#define XFS_MIN_FREELIST_RAW(bl,cl,mp) xfs_min_freelist_raw(bl,cl,mp) +#else +#define XFS_MIN_FREELIST_RAW(bl,cl,mp) \ + (MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + \ + MIN(cl + 1, XFS_AG_MAXLEVELS(mp))) +#endif + +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_AGB_TO_FSB) +xfs_fsblock_t xfs_agb_to_fsb(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_agblock_t agbno); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGB_TO_FSB) +#define XFS_AGB_TO_FSB(mp,agno,agbno) xfs_agb_to_fsb(mp,agno,agbno) +#else +#define XFS_AGB_TO_FSB(mp,agno,agbno) \ + (((xfs_fsblock_t)(agno) << (mp)->m_sb.sb_agblklog) | (agbno)) +#endif + +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_FSB_TO_AGNO) +xfs_agnumber_t xfs_fsb_to_agno(struct xfs_mount *mp, xfs_fsblock_t fsbno); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FSB_TO_AGNO) +#define XFS_FSB_TO_AGNO(mp,fsbno) xfs_fsb_to_agno(mp,fsbno) +#else +#define XFS_FSB_TO_AGNO(mp,fsbno) \ + ((xfs_agnumber_t)((fsbno) >> (mp)->m_sb.sb_agblklog)) +#endif + +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_FSB_TO_AGBNO) +xfs_agblock_t xfs_fsb_to_agbno(struct xfs_mount *mp, xfs_fsblock_t fsbno); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FSB_TO_AGBNO) +#define XFS_FSB_TO_AGBNO(mp,fsbno) xfs_fsb_to_agbno(mp,fsbno) +#else +#define XFS_FSB_TO_AGBNO(mp,fsbno) \ + ((xfs_agblock_t)((fsbno) & XFS_MASK32LO((mp)->m_sb.sb_agblklog))) +#endif + +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_AGB_TO_DADDR) +xfs_daddr_t xfs_agb_to_daddr(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_agblock_t agbno); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGB_TO_DADDR) +#define XFS_AGB_TO_DADDR(mp,agno,agbno) xfs_agb_to_daddr(mp,agno,agbno) +#else +#define XFS_AGB_TO_DADDR(mp,agno,agbno) \ + ((xfs_daddr_t)(XFS_FSB_TO_BB(mp, \ + (xfs_fsblock_t)(agno) * (mp)->m_sb.sb_agblocks + (agbno)))) +#endif +/* + * XFS_DADDR_TO_AGNO and XFS_DADDR_TO_AGBNO moved to xfs_mount.h + * to avoid header file ordering change + */ + +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_AG_DADDR) +xfs_daddr_t xfs_ag_daddr(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_daddr_t d); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AG_DADDR) +#define XFS_AG_DADDR(mp,agno,d) xfs_ag_daddr(mp,agno,d) +#else +#define XFS_AG_DADDR(mp,agno,d) (XFS_AGB_TO_DADDR(mp, agno, 0) + (d)) +#endif + +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_AGF) +xfs_agf_t *xfs_buf_to_agf(struct xfs_buf *bp); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_AGF) +#define XFS_BUF_TO_AGF(bp) xfs_buf_to_agf(bp) +#else +#define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)XFS_BUF_PTR(bp)) +#endif + +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_AGI) +xfs_agi_t *xfs_buf_to_agi(struct xfs_buf *bp); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_AGI) +#define XFS_BUF_TO_AGI(bp) xfs_buf_to_agi(bp) +#else +#define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)XFS_BUF_PTR(bp)) +#endif + +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_AGFL) +xfs_agfl_t *xfs_buf_to_agfl(struct xfs_buf *bp); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_AGFL) +#define XFS_BUF_TO_AGFL(bp) xfs_buf_to_agfl(bp) +#else +#define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)XFS_BUF_PTR(bp)) +#endif + +/* + * For checking for bad ranges of xfs_daddr_t's, covering multiple + * allocation groups or a single xfs_daddr_t that's a superblock copy. + */ +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_AG_CHECK_DADDR) +void xfs_ag_check_daddr(struct xfs_mount *mp, xfs_daddr_t d, xfs_extlen_t len); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AG_CHECK_DADDR) +#define XFS_AG_CHECK_DADDR(mp,d,len) xfs_ag_check_daddr(mp,d,len) +#else +#define XFS_AG_CHECK_DADDR(mp,d,len) \ + ((len) == 1 ? \ + ASSERT((d) == XFS_SB_DADDR || \ + XFS_DADDR_TO_AGBNO(mp, d) != XFS_SB_DADDR) : \ + ASSERT(XFS_DADDR_TO_AGNO(mp, d) == \ + XFS_DADDR_TO_AGNO(mp, (d) + (len) - 1))) +#endif + +#endif /* __XFS_AG_H__ */ diff --git a/sys/gnu/fs/xfs/xfs_alloc.c b/sys/gnu/fs/xfs/xfs_alloc.c new file mode 100644 index 000000000000..74cad985b003 --- /dev/null +++ b/sys/gnu/fs/xfs/xfs_alloc.c @@ -0,0 +1,2613 @@ +/* + * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * Free space allocation for XFS. + */ +#include "xfs.h" +#include "xfs_macros.h" +#include "xfs_types.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_alloc_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_alloc.h" +#include "xfs_bit.h" +#include "xfs_error.h" + + +#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b))) + +#define XFSA_FIXUP_BNO_OK 1 +#define XFSA_FIXUP_CNT_OK 2 + +int +xfs_alloc_search_busy(xfs_trans_t *tp, + xfs_agnumber_t agno, + xfs_agblock_t bno, + xfs_extlen_t len); + +#if defined(XFS_ALLOC_TRACE) +ktrace_t *xfs_alloc_trace_buf; + +#define TRACE_ALLOC(s,a) \ + xfs_alloc_trace_alloc(fname, s, a, __LINE__) +#define TRACE_FREE(s,a,b,x,f) \ + xfs_alloc_trace_free(fname, s, mp, a, b, x, f, __LINE__) +#define TRACE_MODAGF(s,a,f) \ + xfs_alloc_trace_modagf(fname, s, mp, a, f, __LINE__) +#define TRACE_BUSY(fname,s,ag,agb,l,sl,tp) \ + xfs_alloc_trace_busy(fname, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__) +#define TRACE_UNBUSY(fname,s,ag,sl,tp) \ + xfs_alloc_trace_busy(fname, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__) +#define TRACE_BUSYSEARCH(fname,s,ag,agb,l,sl,tp) \ + xfs_alloc_trace_busy(fname, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__) +#else +#define TRACE_ALLOC(s,a) +#define TRACE_FREE(s,a,b,x,f) +#define TRACE_MODAGF(s,a,f) +#define TRACE_BUSY(s,a,ag,agb,l,sl,tp) +#define TRACE_UNBUSY(fname,s,ag,sl,tp) +#define TRACE_BUSYSEARCH(fname,s,ag,agb,l,sl,tp) +#endif /* XFS_ALLOC_TRACE */ + +/* + * Prototypes for per-ag allocation routines + */ + +STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); +STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); +STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); +STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, + xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); + +/* + * Internal functions. + */ + +/* + * Compute aligned version of the found extent. + * Takes alignment and min length into account. + */ +STATIC int /* success (>= minlen) */ +xfs_alloc_compute_aligned( + xfs_agblock_t foundbno, /* starting block in found extent */ + xfs_extlen_t foundlen, /* length in found extent */ + xfs_extlen_t alignment, /* alignment for allocation */ + xfs_extlen_t minlen, /* minimum length for allocation */ + xfs_agblock_t *resbno, /* result block number */ + xfs_extlen_t *reslen) /* result length */ +{ + xfs_agblock_t bno; + xfs_extlen_t diff; + xfs_extlen_t len; + + if (alignment > 1 && foundlen >= minlen) { + bno = roundup(foundbno, alignment); + diff = bno - foundbno; + len = diff >= foundlen ? 0 : foundlen - diff; + } else { + bno = foundbno; + len = foundlen; + } + *resbno = bno; + *reslen = len; + return len >= minlen; +} + +/* + * Compute best start block and diff for "near" allocations. + * freelen >= wantlen already checked by caller. + */ +STATIC xfs_extlen_t /* difference value (absolute) */ +xfs_alloc_compute_diff( + xfs_agblock_t wantbno, /* target starting block */ + xfs_extlen_t wantlen, /* target length */ + xfs_extlen_t alignment, /* target alignment */ + xfs_agblock_t freebno, /* freespace's starting block */ + xfs_extlen_t freelen, /* freespace's length */ + xfs_agblock_t *newbnop) /* result: best start block from free */ +{ + xfs_agblock_t freeend; /* end of freespace extent */ + xfs_agblock_t newbno1; /* return block number */ + xfs_agblock_t newbno2; /* other new block number */ + xfs_extlen_t newlen1=0; /* length with newbno1 */ + xfs_extlen_t newlen2=0; /* length with newbno2 */ + xfs_agblock_t wantend; /* end of target extent */ + + ASSERT(freelen >= wantlen); + freeend = freebno + freelen; + wantend = wantbno + wantlen; + if (freebno >= wantbno) { + if ((newbno1 = roundup(freebno, alignment)) >= freeend) + newbno1 = NULLAGBLOCK; + } else if (freeend >= wantend && alignment > 1) { + newbno1 = roundup(wantbno, alignment); + newbno2 = newbno1 - alignment; + if (newbno1 >= freeend) + newbno1 = NULLAGBLOCK; + else + newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1); + if (newbno2 < freebno) + newbno2 = NULLAGBLOCK; + else + newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2); + if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) { + if (newlen1 < newlen2 || + (newlen1 == newlen2 && + XFS_ABSDIFF(newbno1, wantbno) > + XFS_ABSDIFF(newbno2, wantbno))) + newbno1 = newbno2; + } else if (newbno2 != NULLAGBLOCK) + newbno1 = newbno2; + } else if (freeend >= wantend) { + newbno1 = wantbno; + } else if (alignment > 1) { + newbno1 = roundup(freeend - wantlen, alignment); + if (newbno1 > freeend - wantlen && + newbno1 - alignment >= freebno) + newbno1 -= alignment; + else if (newbno1 >= freeend) + newbno1 = NULLAGBLOCK; + } else + newbno1 = freeend - wantlen; + *newbnop = newbno1; + return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno); +} + +/* + * Fix up the length, based on mod and prod. + * len should be k * prod + mod for some k. + * If len is too small it is returned unchanged. + * If len hits maxlen it is left alone. + */ +STATIC void +xfs_alloc_fix_len( + xfs_alloc_arg_t *args) /* allocation argument structure */ +{ + xfs_extlen_t k; + xfs_extlen_t rlen; + + ASSERT(args->mod < args->prod); + rlen = args->len; + ASSERT(rlen >= args->minlen); + ASSERT(rlen <= args->maxlen); + if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen || + (args->mod == 0 && rlen < args->prod)) + return; + k = rlen % args->prod; + if (k == args->mod) + return; + if (k > args->mod) { + if ((int)(rlen = rlen - k - args->mod) < (int)args->minlen) + return; + } else { + if ((int)(rlen = rlen - args->prod - (args->mod - k)) < + (int)args->minlen) + return; + } + ASSERT(rlen >= args->minlen); + ASSERT(rlen <= args->maxlen); + args->len = rlen; +} + +/* + * Fix up length if there is too little space left in the a.g. + * Return 1 if ok, 0 if too little, should give up. + */ +STATIC int +xfs_alloc_fix_minleft( + xfs_alloc_arg_t *args) /* allocation argument structure */ +{ + xfs_agf_t *agf; /* a.g. freelist header */ + int diff; /* free space difference */ + + if (args->minleft == 0) + return 1; + agf = XFS_BUF_TO_AGF(args->agbp); + diff = INT_GET(agf->agf_freeblks, ARCH_CONVERT) + + INT_GET(agf->agf_flcount, ARCH_CONVERT) + - args->len - args->minleft; + if (diff >= 0) + return 1; + args->len += diff; /* shrink the allocated space */ + if (args->len >= args->minlen) + return 1; + args->agbno = NULLAGBLOCK; + return 0; +} + +/* + * Update the two btrees, logically removing from freespace the extent + * starting at rbno, rlen blocks. The extent is contained within the + * actual (current) free extent fbno for flen blocks. + * Flags are passed in indicating whether the cursors are set to the + * relevant records. + */ +STATIC int /* error code */ +xfs_alloc_fixup_trees( + xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */ + xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */ + xfs_agblock_t fbno, /* starting block of free extent */ + xfs_extlen_t flen, /* length of free extent */ + xfs_agblock_t rbno, /* starting block of returned extent */ + xfs_extlen_t rlen, /* length of returned extent */ + int flags) /* flags, XFSA_FIXUP_... */ +{ + int error; /* error code */ + int i; /* operation results */ + xfs_agblock_t nfbno1; /* first new free startblock */ + xfs_agblock_t nfbno2; /* second new free startblock */ + xfs_extlen_t nflen1=0; /* first new free length */ + xfs_extlen_t nflen2=0; /* second new free length */ + + /* + * Look up the record in the by-size tree if necessary. + */ + if (flags & XFSA_FIXUP_CNT_OK) { +#ifdef DEBUG + if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i))) + return error; + XFS_WANT_CORRUPTED_RETURN( + i == 1 && nfbno1 == fbno && nflen1 == flen); +#endif + } else { + if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i))) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + } + /* + * Look up the record in the by-block tree if necessary. + */ + if (flags & XFSA_FIXUP_BNO_OK) { +#ifdef DEBUG + if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i))) + return error; + XFS_WANT_CORRUPTED_RETURN( + i == 1 && nfbno1 == fbno && nflen1 == flen); +#endif + } else { + if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i))) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + } +#ifdef DEBUG + { + xfs_alloc_block_t *bnoblock; + xfs_alloc_block_t *cntblock; + + if (bno_cur->bc_nlevels == 1 && + cnt_cur->bc_nlevels == 1) { + bnoblock = XFS_BUF_TO_ALLOC_BLOCK(bno_cur->bc_bufs[0]); + cntblock = XFS_BUF_TO_ALLOC_BLOCK(cnt_cur->bc_bufs[0]); + XFS_WANT_CORRUPTED_RETURN( + INT_GET(bnoblock->bb_numrecs, ARCH_CONVERT) == INT_GET(cntblock->bb_numrecs, ARCH_CONVERT)); + } + } +#endif + /* + * Deal with all four cases: the allocated record is contained + * within the freespace record, so we can have new freespace + * at either (or both) end, or no freespace remaining. + */ + if (rbno == fbno && rlen == flen) + nfbno1 = nfbno2 = NULLAGBLOCK; + else if (rbno == fbno) { + nfbno1 = rbno + rlen; + nflen1 = flen - rlen; + nfbno2 = NULLAGBLOCK; + } else if (rbno + rlen == fbno + flen) { + nfbno1 = fbno; + nflen1 = flen - rlen; + nfbno2 = NULLAGBLOCK; + } else { + nfbno1 = fbno; + nflen1 = rbno - fbno; + nfbno2 = rbno + rlen; + nflen2 = (fbno + flen) - nfbno2; + } + /* + * Delete the entry from the by-size btree. + */ + if ((error = xfs_alloc_delete(cnt_cur, &i))) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + /* + * Add new by-size btree entry(s). + */ + if (nfbno1 != NULLAGBLOCK) { + if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 0); + if ((error = xfs_alloc_insert(cnt_cur, &i))) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + } + if (nfbno2 != NULLAGBLOCK) { + if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 0); + if ((error = xfs_alloc_insert(cnt_cur, &i))) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + } + /* + * Fix up the by-block btree entry(s). + */ + if (nfbno1 == NULLAGBLOCK) { + /* + * No remaining freespace, just delete the by-block tree entry. + */ + if ((error = xfs_alloc_delete(bno_cur, &i))) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + } else { + /* + * Update the by-block entry to start later|be shorter. + */ + if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1))) + return error; + } + if (nfbno2 != NULLAGBLOCK) { + /* + * 2 resulting free entries, need to add one. + */ + if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 0); + if ((error = xfs_alloc_insert(bno_cur, &i))) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + } + return 0; +} + +/* + * Read in the allocation group free block array. + */ +STATIC int /* error */ +xfs_alloc_read_agfl( + xfs_mount_t *mp, /* mount point structure */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_buf_t **bpp) /* buffer for the ag free block array */ +{ + xfs_buf_t *bp; /* return value */ + int error; + + ASSERT(agno != NULLAGNUMBER); + error = xfs_trans_read_buf( + mp, tp, mp->m_ddev_targp, + XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)), + XFS_FSS_TO_BB(mp, 1), 0, &bp); + if (error) + return error; + ASSERT(bp); + ASSERT(!XFS_BUF_GETERROR(bp)); + XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGFL, XFS_AGFL_REF); + *bpp = bp; + return 0; +} + +#if defined(XFS_ALLOC_TRACE) +/* + * Add an allocation trace entry for an alloc call. + */ +STATIC void +xfs_alloc_trace_alloc( + char *name, /* function tag string */ + char *str, /* additional string */ + xfs_alloc_arg_t *args, /* allocation argument structure */ + int line) /* source line number */ +{ + ktrace_enter(xfs_alloc_trace_buf, + (void *)(__psint_t)(XFS_ALLOC_KTRACE_ALLOC | (line << 16)), + (void *)name, + (void *)str, + (void *)args->mp, + (void *)(__psunsigned_t)args->agno, + (void *)(__psunsigned_t)args->agbno, + (void *)(__psunsigned_t)args->minlen, + (void *)(__psunsigned_t)args->maxlen, + (void *)(__psunsigned_t)args->mod, + (void *)(__psunsigned_t)args->prod, + (void *)(__psunsigned_t)args->minleft, + (void *)(__psunsigned_t)args->total, + (void *)(__psunsigned_t)args->alignment, + (void *)(__psunsigned_t)args->len, + (void *)((((__psint_t)args->type) << 16) | + (__psint_t)args->otype), + (void *)(__psint_t)((args->wasdel << 3) | + (args->wasfromfl << 2) | + (args->isfl << 1) | + (args->userdata << 0))); +} + +/* + * Add an allocation trace entry for a free call. + */ +STATIC void +xfs_alloc_trace_free( + char *name, /* function tag string */ + char *str, /* additional string */ + xfs_mount_t *mp, /* file system mount point */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_agblock_t agbno, /* a.g. relative block number */ + xfs_extlen_t len, /* length of extent */ + int isfl, /* set if is freelist allocation/free */ + int line) /* source line number */ +{ + ktrace_enter(xfs_alloc_trace_buf, + (void *)(__psint_t)(XFS_ALLOC_KTRACE_FREE | (line << 16)), + (void *)name, + (void *)str, + (void *)mp, + (void *)(__psunsigned_t)agno, + (void *)(__psunsigned_t)agbno, + (void *)(__psunsigned_t)len, + (void *)(__psint_t)isfl, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); +} + +/* + * Add an allocation trace entry for modifying an agf. + */ +STATIC void +xfs_alloc_trace_modagf( + char *name, /* function tag string */ + char *str, /* additional string */ + xfs_mount_t *mp, /* file system mount point */ + xfs_agf_t *agf, /* new agf value */ + int flags, /* logging flags for agf */ + int line) /* source line number */ +{ + ktrace_enter(xfs_alloc_trace_buf, + (void *)(__psint_t)(XFS_ALLOC_KTRACE_MODAGF | (line << 16)), + (void *)name, + (void *)str, + (void *)mp, + (void *)(__psint_t)flags, + (void *)(__psunsigned_t)INT_GET(agf->agf_seqno, ARCH_CONVERT), + (void *)(__psunsigned_t)INT_GET(agf->agf_length, ARCH_CONVERT), + (void *)(__psunsigned_t)INT_GET(agf->agf_roots[XFS_BTNUM_BNO], + ARCH_CONVERT), + (void *)(__psunsigned_t)INT_GET(agf->agf_roots[XFS_BTNUM_CNT], + ARCH_CONVERT), + (void *)(__psunsigned_t)INT_GET(agf->agf_levels[XFS_BTNUM_BNO], + ARCH_CONVERT), + (void *)(__psunsigned_t)INT_GET(agf->agf_levels[XFS_BTNUM_CNT], + ARCH_CONVERT), + (void *)(__psunsigned_t)INT_GET(agf->agf_flfirst, ARCH_CONVERT), + (void *)(__psunsigned_t)INT_GET(agf->agf_fllast, ARCH_CONVERT), + (void *)(__psunsigned_t)INT_GET(agf->agf_flcount, ARCH_CONVERT), + (void *)(__psunsigned_t)INT_GET(agf->agf_freeblks, ARCH_CONVERT), + (void *)(__psunsigned_t)INT_GET(agf->agf_longest, ARCH_CONVERT)); +} + +STATIC void +xfs_alloc_trace_busy( + char *name, /* function tag string */ + char *str, /* additional string */ + xfs_mount_t *mp, /* file system mount poing */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_agblock_t agbno, /* a.g. relative block number */ + xfs_extlen_t len, /* length of extent */ + int slot, /* perag Busy slot */ + xfs_trans_t *tp, + int trtype, /* type: add, delete, search */ + int line) /* source line number */ +{ + ktrace_enter(xfs_alloc_trace_buf, + (void *)(__psint_t)(trtype | (line << 16)), + (void *)name, + (void *)str, + (void *)mp, + (void *)(__psunsigned_t)agno, + (void *)(__psunsigned_t)agbno, + (void *)(__psunsigned_t)len, + (void *)(__psint_t)slot, + (void *)tp, + NULL, NULL, NULL, NULL, NULL, NULL, NULL); +} +#endif /* XFS_ALLOC_TRACE */ + +/* + * Allocation group level functions. + */ + +/* + * Allocate a variable extent in the allocation group agno. + * Type and bno are used to determine where in the allocation group the + * extent will start. + * Extent's length (returned in *len) will be between minlen and maxlen, + * and of the form k * prod + mod unless there's nothing that large. + * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. + */ +STATIC int /* error */ +xfs_alloc_ag_vextent( + xfs_alloc_arg_t *args) /* argument structure for allocation */ +{ + int error=0; +#ifdef XFS_ALLOC_TRACE + static char fname[] = "xfs_alloc_ag_vextent"; +#endif + + ASSERT(args->minlen > 0); + ASSERT(args->maxlen > 0); + ASSERT(args->minlen <= args->maxlen); + ASSERT(args->mod < args->prod); + ASSERT(args->alignment > 0); + /* + * Branch to correct routine based on the type. + */ + args->wasfromfl = 0; + switch (args->type) { + case XFS_ALLOCTYPE_THIS_AG: + error = xfs_alloc_ag_vextent_size(args); + break; + case XFS_ALLOCTYPE_NEAR_BNO: + error = xfs_alloc_ag_vextent_near(args); + break; + case XFS_ALLOCTYPE_THIS_BNO: + error = xfs_alloc_ag_vextent_exact(args); + break; + default: + ASSERT(0); + /* NOTREACHED */ + } + if (error) + return error; + /* + * If the allocation worked, need to change the agf structure + * (and log it), and the superblock. + */ + if (args->agbno != NULLAGBLOCK) { + xfs_agf_t *agf; /* allocation group freelist header */ +#ifdef XFS_ALLOC_TRACE + xfs_mount_t *mp = args->mp; +#endif + long slen = (long)args->len; + + ASSERT(args->len >= args->minlen && args->len <= args->maxlen); + ASSERT(!(args->wasfromfl) || !args->isfl); + ASSERT(args->agbno % args->alignment == 0); + if (!(args->wasfromfl)) { + + agf = XFS_BUF_TO_AGF(args->agbp); + INT_MOD(agf->agf_freeblks, ARCH_CONVERT, -(args->len)); + xfs_trans_agblocks_delta(args->tp, + -((long)(args->len))); + args->pag->pagf_freeblks -= args->len; + ASSERT(INT_GET(agf->agf_freeblks, ARCH_CONVERT) + <= INT_GET(agf->agf_length, ARCH_CONVERT)); + TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS); + xfs_alloc_log_agf(args->tp, args->agbp, + XFS_AGF_FREEBLKS); + /* search the busylist for these blocks */ + xfs_alloc_search_busy(args->tp, args->agno, + args->agbno, args->len); + } + if (!args->isfl) + xfs_trans_mod_sb(args->tp, + args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS : + XFS_TRANS_SB_FDBLOCKS, -slen); + XFS_STATS_INC(xs_allocx); + XFS_STATS_ADD(xs_allocb, args->len); + } + return 0; +} + +/* + * Allocate a variable extent at exactly agno/bno. + * Extent's length (returned in *len) will be between minlen and maxlen, + * and of the form k * prod + mod unless there's nothing that large. + * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it. + */ +STATIC int /* error */ +xfs_alloc_ag_vextent_exact( + xfs_alloc_arg_t *args) /* allocation argument structure */ +{ + xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ + xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ + xfs_agblock_t end; /* end of allocated extent */ + int error; + xfs_agblock_t fbno; /* start block of found extent */ + xfs_agblock_t fend; /* end block of found extent */ + xfs_extlen_t flen; /* length of found extent */ +#ifdef XFS_ALLOC_TRACE + static char fname[] = "xfs_alloc_ag_vextent_exact"; +#endif + int i; /* success/failure of operation */ + xfs_agblock_t maxend; /* end of maximal extent */ + xfs_agblock_t minend; /* end of minimal extent */ + xfs_extlen_t rlen; /* length of returned extent */ + + ASSERT(args->alignment == 1); + /* + * Allocate/initialize a cursor for the by-number freespace btree. + */ + bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_BNO, 0, 0); + /* + * Lookup bno and minlen in the btree (minlen is irrelevant, really). + * Look for the closest free block <= bno, it must contain bno + * if any free block does. + */ + if ((error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i))) + goto error0; + if (!i) { + /* + * Didn't find it, return null. + */ + xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); + args->agbno = NULLAGBLOCK; + return 0; + } + /* + * Grab the freespace record. + */ + if ((error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + ASSERT(fbno <= args->agbno); + minend = args->agbno + args->minlen; + maxend = args->agbno + args->maxlen; + fend = fbno + flen; + /* + * Give up if the freespace isn't long enough for the minimum request. + */ + if (fend < minend) { + xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); + args->agbno = NULLAGBLOCK; + return 0; + } + /* + * End of extent will be smaller of the freespace end and the + * maximal requested end. + */ + end = XFS_AGBLOCK_MIN(fend, maxend); + /* + * Fix the length according to mod and prod if given. + */ + args->len = end - args->agbno; + xfs_alloc_fix_len(args); + if (!xfs_alloc_fix_minleft(args)) { + xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); + return 0; + } + rlen = args->len; + ASSERT(args->agbno + rlen <= fend); + end = args->agbno + rlen; + /* + * We are allocating agbno for rlen [agbno .. end] + * Allocate/initialize a cursor for the by-size btree. + */ + cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_CNT, 0, 0); + ASSERT(args->agbno + args->len <= + INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length, + ARCH_CONVERT)); + if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, + args->agbno, args->len, XFSA_FIXUP_BNO_OK))) { + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); + goto error0; + } + xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + TRACE_ALLOC("normal", args); + args->wasfromfl = 0; + return 0; + +error0: + xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); + TRACE_ALLOC("error", args); + return error; +} + +/* + * Allocate a variable extent near bno in the allocation group agno. + * Extent's length (returned in len) will be between minlen and maxlen, + * and of the form k * prod + mod unless there's nothing that large. + * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. + */ +STATIC int /* error */ +xfs_alloc_ag_vextent_near( + xfs_alloc_arg_t *args) /* allocation argument structure */ +{ + xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */ + xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */ + xfs_btree_cur_t *cnt_cur; /* cursor for count btree */ +#ifdef XFS_ALLOC_TRACE + static char fname[] = "xfs_alloc_ag_vextent_near"; +#endif + xfs_agblock_t gtbno; /* start bno of right side entry */ + xfs_agblock_t gtbnoa; /* aligned ... */ + xfs_extlen_t gtdiff; /* difference to right side entry */ + xfs_extlen_t gtlen; /* length of right side entry */ + xfs_extlen_t gtlena; /* aligned ... */ + xfs_agblock_t gtnew; /* useful start bno of right side */ + int error; /* error code */ + int i; /* result code, temporary */ + int j; /* result code, temporary */ + xfs_agblock_t ltbno; /* start bno of left side entry */ + xfs_agblock_t ltbnoa; /* aligned ... */ + xfs_extlen_t ltdiff; /* difference to left side entry */ + /*REFERENCED*/ + xfs_agblock_t ltend; /* end bno of left side entry */ + xfs_extlen_t ltlen; /* length of left side entry */ + xfs_extlen_t ltlena; /* aligned ... */ + xfs_agblock_t ltnew; /* useful start bno of left side */ + xfs_extlen_t rlen; /* length of returned extent */ +#if defined(DEBUG) && defined(__KERNEL__) + /* + * Randomly don't execute the first algorithm. + */ + int dofirst; /* set to do first algorithm */ + + dofirst = random() & 1; +#endif + /* + * Get a cursor for the by-size btree. + */ + cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_CNT, 0, 0); + ltlen = 0; + bno_cur_lt = bno_cur_gt = NULL; + /* + * See if there are any free extents as big as maxlen. + */ + if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i))) + goto error0; + /* + * If none, then pick up the last entry in the tree unless the + * tree is empty. + */ + if (!i) { + if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, <bno, + <len, &i))) + goto error0; + if (i == 0 || ltlen == 0) { + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + return 0; + } + ASSERT(i == 1); + } + args->wasfromfl = 0; + /* + * First algorithm. + * If the requested extent is large wrt the freespaces available + * in this a.g., then the cursor will be pointing to a btree entry + * near the right edge of the tree. If it's in the last btree leaf + * block, then we just examine all the entries in that block + * that are big enough, and pick the best one. + * This is written as a while loop so we can break out of it, + * but we never loop back to the top. + */ + while (xfs_btree_islastblock(cnt_cur, 0)) { + xfs_extlen_t bdiff; + int besti=0; + xfs_extlen_t blen=0; + xfs_agblock_t bnew=0; + +#if defined(DEBUG) && defined(__KERNEL__) + if (!dofirst) + break; +#endif + /* + * Start from the entry that lookup found, sequence through + * all larger free blocks. If we're actually pointing at a + * record smaller than maxlen, go to the start of this block, + * and skip all those smaller than minlen. + */ + if (ltlen || args->alignment > 1) { + cnt_cur->bc_ptrs[0] = 1; + do { + if ((error = xfs_alloc_get_rec(cnt_cur, <bno, + <len, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (ltlen >= args->minlen) + break; + if ((error = xfs_alloc_increment(cnt_cur, 0, &i))) + goto error0; + } while (i); + ASSERT(ltlen >= args->minlen); + if (!i) + break; + } + i = cnt_cur->bc_ptrs[0]; + for (j = 1, blen = 0, bdiff = 0; + !error && j && (blen < args->maxlen || bdiff > 0); + error = xfs_alloc_increment(cnt_cur, 0, &j)) { + /* + * For each entry, decide if it's better than + * the previous best entry. + */ + if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (!xfs_alloc_compute_aligned(ltbno, ltlen, + args->alignment, args->minlen, + <bnoa, <lena)) + continue; + args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); + xfs_alloc_fix_len(args); + ASSERT(args->len >= args->minlen); + if (args->len < blen) + continue; + ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, + args->alignment, ltbno, ltlen, <new); + if (ltnew != NULLAGBLOCK && + (args->len > blen || ltdiff < bdiff)) { + bdiff = ltdiff; + bnew = ltnew; + blen = args->len; + besti = cnt_cur->bc_ptrs[0]; + } + } + /* + * It didn't work. We COULD be in a case where + * there's a good record somewhere, so try again. + */ + if (blen == 0) + break; + /* + * Point at the best entry, and retrieve it again. + */ + cnt_cur->bc_ptrs[0] = besti; + if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + ltend = ltbno + ltlen; + ASSERT(ltend <= INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length, + ARCH_CONVERT)); + args->len = blen; + if (!xfs_alloc_fix_minleft(args)) { + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + TRACE_ALLOC("nominleft", args); + return 0; + } + blen = args->len; + /* + * We are allocating starting at bnew for blen blocks. + */ + args->agbno = bnew; + ASSERT(bnew >= ltbno); + ASSERT(bnew + blen <= ltend); + /* + * Set up a cursor for the by-bno tree. + */ + bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp, + args->agbp, args->agno, XFS_BTNUM_BNO, 0, 0); + /* + * Fix up the btree entries. + */ + if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, + ltlen, bnew, blen, XFSA_FIXUP_CNT_OK))) + goto error0; + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); + TRACE_ALLOC("first", args); + return 0; + } + /* + * Second algorithm. + * Search in the by-bno tree to the left and to the right + * simultaneously, until in each case we find a space big enough, + * or run into the edge of the tree. When we run into the edge, + * we deallocate that cursor. + * If both searches succeed, we compare the two spaces and pick + * the better one. + * With alignment, it's possible for both to fail; the upper + * level algorithm that picks allocation groups for allocations + * is not supposed to do this. + */ + /* + * Allocate and initialize the cursor for the leftward search. + */ + bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_BNO, 0, 0); + /* + * Lookup <= bno to find the leftward search's starting point. + */ + if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i))) + goto error0; + if (!i) { + /* + * Didn't find anything; use this cursor for the rightward + * search. + */ + bno_cur_gt = bno_cur_lt; + bno_cur_lt = 0; + } + /* + * Found something. Duplicate the cursor for the rightward search. + */ + else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt))) + goto error0; + /* + * Increment the cursor, so we will point at the entry just right + * of the leftward entry if any, or to the leftmost entry. + */ + if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i))) + goto error0; + if (!i) { + /* + * It failed, there are no rightward entries. + */ + xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR); + bno_cur_gt = NULL; + } + /* + * Loop going left with the leftward cursor, right with the + * rightward cursor, until either both directions give up or + * we find an entry at least as big as minlen. + */ + do { + if (bno_cur_lt) { + if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (xfs_alloc_compute_aligned(ltbno, ltlen, + args->alignment, args->minlen, + <bnoa, <lena)) + break; + if ((error = xfs_alloc_decrement(bno_cur_lt, 0, &i))) + goto error0; + if (!i) { + xfs_btree_del_cursor(bno_cur_lt, + XFS_BTREE_NOERROR); + bno_cur_lt = NULL; + } + } + if (bno_cur_gt) { + if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (xfs_alloc_compute_aligned(gtbno, gtlen, + args->alignment, args->minlen, + >bnoa, >lena)) + break; + if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i))) + goto error0; + if (!i) { + xfs_btree_del_cursor(bno_cur_gt, + XFS_BTREE_NOERROR); + bno_cur_gt = NULL; + } + } + } while (bno_cur_lt || bno_cur_gt); + /* + * Got both cursors still active, need to find better entry. + */ + if (bno_cur_lt && bno_cur_gt) { + /* + * Left side is long enough, look for a right side entry. + */ + if (ltlena >= args->minlen) { + /* + * Fix up the length. + */ + args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); + xfs_alloc_fix_len(args); + rlen = args->len; + ltdiff = xfs_alloc_compute_diff(args->agbno, rlen, + args->alignment, ltbno, ltlen, <new); + /* + * Not perfect. + */ + if (ltdiff) { + /* + * Look until we find a better one, run out of + * space, or run off the end. + */ + while (bno_cur_lt && bno_cur_gt) { + if ((error = xfs_alloc_get_rec( + bno_cur_gt, >bno, + >len, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + xfs_alloc_compute_aligned(gtbno, gtlen, + args->alignment, args->minlen, + >bnoa, >lena); + /* + * The left one is clearly better. + */ + if (gtbnoa >= args->agbno + ltdiff) { + xfs_btree_del_cursor( + bno_cur_gt, + XFS_BTREE_NOERROR); + bno_cur_gt = NULL; + break; + } + /* + * If we reach a big enough entry, + * compare the two and pick the best. + */ + if (gtlena >= args->minlen) { + args->len = + XFS_EXTLEN_MIN(gtlena, + args->maxlen); + xfs_alloc_fix_len(args); + rlen = args->len; + gtdiff = xfs_alloc_compute_diff( + args->agbno, rlen, + args->alignment, + gtbno, gtlen, >new); + /* + * Right side is better. + */ + if (gtdiff < ltdiff) { + xfs_btree_del_cursor( + bno_cur_lt, + XFS_BTREE_NOERROR); + bno_cur_lt = NULL; + } + /* + * Left side is better. + */ + else { + xfs_btree_del_cursor( + bno_cur_gt, + XFS_BTREE_NOERROR); + bno_cur_gt = NULL; + } + break; + } + /* + * Fell off the right end. + */ + if ((error = xfs_alloc_increment( + bno_cur_gt, 0, &i))) + goto error0; + if (!i) { + xfs_btree_del_cursor( + bno_cur_gt, + XFS_BTREE_NOERROR); + bno_cur_gt = NULL; + break; + } + } + } + /* + * The left side is perfect, trash the right side. + */ + else { + xfs_btree_del_cursor(bno_cur_gt, + XFS_BTREE_NOERROR); + bno_cur_gt = NULL; + } + } + /* + * It's the right side that was found first, look left. + */ + else { + /* + * Fix up the length. + */ + args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); + xfs_alloc_fix_len(args); + rlen = args->len; + gtdiff = xfs_alloc_compute_diff(args->agbno, rlen, + args->alignment, gtbno, gtlen, >new); + /* + * Right side entry isn't perfect. + */ + if (gtdiff) { + /* + * Look until we find a better one, run out of + * space, or run off the end. + */ + while (bno_cur_lt && bno_cur_gt) { + if ((error = xfs_alloc_get_rec( + bno_cur_lt, <bno, + <len, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + xfs_alloc_compute_aligned(ltbno, ltlen, + args->alignment, args->minlen, + <bnoa, <lena); + /* + * The right one is clearly better. + */ + if (ltbnoa <= args->agbno - gtdiff) { + xfs_btree_del_cursor( + bno_cur_lt, + XFS_BTREE_NOERROR); + bno_cur_lt = NULL; + break; + } + /* + * If we reach a big enough entry, + * compare the two and pick the best. + */ + if (ltlena >= args->minlen) { + args->len = XFS_EXTLEN_MIN( + ltlena, args->maxlen); + xfs_alloc_fix_len(args); + rlen = args->len; + ltdiff = xfs_alloc_compute_diff( + args->agbno, rlen, + args->alignment, + ltbno, ltlen, <new); + /* + * Left side is better. + */ + if (ltdiff < gtdiff) { + xfs_btree_del_cursor( + bno_cur_gt, + XFS_BTREE_NOERROR); + bno_cur_gt = NULL; + } + /* + * Right side is better. + */ + else { + xfs_btree_del_cursor( + bno_cur_lt, + XFS_BTREE_NOERROR); + bno_cur_lt = NULL; + } + break; + } + /* + * Fell off the left end. + */ + if ((error = xfs_alloc_decrement( + bno_cur_lt, 0, &i))) + goto error0; + if (!i) { + xfs_btree_del_cursor(bno_cur_lt, + XFS_BTREE_NOERROR); + bno_cur_lt = NULL; + break; + } + } + } + /* + * The right side is perfect, trash the left side. + */ + else { + xfs_btree_del_cursor(bno_cur_lt, + XFS_BTREE_NOERROR); + bno_cur_lt = NULL; + } + } + } + /* + * If we couldn't get anything, give up. + */ + if (bno_cur_lt == NULL && bno_cur_gt == NULL) { + TRACE_ALLOC("neither", args); + args->agbno = NULLAGBLOCK; + return 0; + } + /* + * At this point we have selected a freespace entry, either to the + * left or to the right. If it's on the right, copy all the + * useful variables to the "left" set so we only have one + * copy of this code. + */ + if (bno_cur_gt) { + bno_cur_lt = bno_cur_gt; + bno_cur_gt = NULL; + ltbno = gtbno; + ltbnoa = gtbnoa; + ltlen = gtlen; + ltlena = gtlena; + j = 1; + } else + j = 0; + /* + * Fix up the length and compute the useful address. + */ + ltend = ltbno + ltlen; + args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); + xfs_alloc_fix_len(args); + if (!xfs_alloc_fix_minleft(args)) { + TRACE_ALLOC("nominleft", args); + xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + return 0; + } + rlen = args->len; + (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno, + ltlen, <new); + ASSERT(ltnew >= ltbno); + ASSERT(ltnew + rlen <= ltend); + ASSERT(ltnew + rlen <= INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length, + ARCH_CONVERT)); + args->agbno = ltnew; + if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, + ltnew, rlen, XFSA_FIXUP_BNO_OK))) + goto error0; + TRACE_ALLOC(j ? "gt" : "lt", args); + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); + return 0; + + error0: + TRACE_ALLOC("error", args); + if (cnt_cur != NULL) + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); + if (bno_cur_lt != NULL) + xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR); + if (bno_cur_gt != NULL) + xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR); + return error; +} + +/* + * Allocate a variable extent anywhere in the allocation group agno. + * Extent's length (returned in len) will be between minlen and maxlen, + * and of the form k * prod + mod unless there's nothing that large. + * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. + */ +STATIC int /* error */ +xfs_alloc_ag_vextent_size( + xfs_alloc_arg_t *args) /* allocation argument structure */ +{ + xfs_btree_cur_t *bno_cur; /* cursor for bno btree */ + xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */ + int error; /* error result */ + xfs_agblock_t fbno; /* start of found freespace */ + xfs_extlen_t flen; /* length of found freespace */ +#ifdef XFS_ALLOC_TRACE + static char fname[] = "xfs_alloc_ag_vextent_size"; +#endif + int i; /* temp status variable */ + xfs_agblock_t rbno; /* returned block number */ + xfs_extlen_t rlen; /* length of returned extent */ + + /* + * Allocate and initialize a cursor for the by-size btree. + */ + cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_CNT, 0, 0); + bno_cur = NULL; + /* + * Look for an entry >= maxlen+alignment-1 blocks. + */ + if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, + args->maxlen + args->alignment - 1, &i))) + goto error0; + /* + * If none, then pick up the last entry in the tree unless the + * tree is empty. + */ + if (!i) { + if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno, + &flen, &i))) + goto error0; + if (i == 0 || flen == 0) { + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + TRACE_ALLOC("noentry", args); + return 0; + } + ASSERT(i == 1); + } + /* + * There's a freespace as big as maxlen+alignment-1, get it. + */ + else { + if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + } + /* + * In the first case above, we got the last entry in the + * by-size btree. Now we check to see if the space hits maxlen + * once aligned; if not, we search left for something better. + * This can't happen in the second case above. + */ + xfs_alloc_compute_aligned(fbno, flen, args->alignment, args->minlen, + &rbno, &rlen); + rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); + XFS_WANT_CORRUPTED_GOTO(rlen == 0 || + (rlen <= flen && rbno + rlen <= fbno + flen), error0); + if (rlen < args->maxlen) { + xfs_agblock_t bestfbno; + xfs_extlen_t bestflen; + xfs_agblock_t bestrbno; + xfs_extlen_t bestrlen; + + bestrlen = rlen; + bestrbno = rbno; + bestflen = flen; + bestfbno = fbno; + for (;;) { + if ((error = xfs_alloc_decrement(cnt_cur, 0, &i))) + goto error0; + if (i == 0) + break; + if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, + &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (flen < bestrlen) + break; + xfs_alloc_compute_aligned(fbno, flen, args->alignment, + args->minlen, &rbno, &rlen); + rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); + XFS_WANT_CORRUPTED_GOTO(rlen == 0 || + (rlen <= flen && rbno + rlen <= fbno + flen), + error0); + if (rlen > bestrlen) { + bestrlen = rlen; + bestrbno = rbno; + bestflen = flen; + bestfbno = fbno; + if (rlen == args->maxlen) + break; + } + } + if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen, + &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + rlen = bestrlen; + rbno = bestrbno; + flen = bestflen; + fbno = bestfbno; + } + args->wasfromfl = 0; + /* + * Fix up the length. + */ + args->len = rlen; + xfs_alloc_fix_len(args); + if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) { + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + TRACE_ALLOC("nominleft", args); + args->agbno = NULLAGBLOCK; + return 0; + } + rlen = args->len; + XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); + /* + * Allocate and initialize a cursor for the by-block tree. + */ + bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_BNO, 0, 0); + if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, + rbno, rlen, XFSA_FIXUP_CNT_OK))) + goto error0; + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); + cnt_cur = bno_cur = NULL; + args->len = rlen; + args->agbno = rbno; + XFS_WANT_CORRUPTED_GOTO( + args->agbno + args->len <= + INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length, + ARCH_CONVERT), + error0); + TRACE_ALLOC("normal", args); + return 0; + +error0: + TRACE_ALLOC("error", args); + if (cnt_cur) + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); + if (bno_cur) + xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); + return error; +} + +/* + * Deal with the case where only small freespaces remain. + * Either return the contents of the last freespace record, + * or allocate space from the freelist if there is nothing in the tree. + */ +STATIC int /* error */ +xfs_alloc_ag_vextent_small( + xfs_alloc_arg_t *args, /* allocation argument structure */ + xfs_btree_cur_t *ccur, /* by-size cursor */ + xfs_agblock_t *fbnop, /* result block number */ + xfs_extlen_t *flenp, /* result length */ + int *stat) /* status: 0-freelist, 1-normal/none */ +{ + int error; + xfs_agblock_t fbno; + xfs_extlen_t flen; +#ifdef XFS_ALLOC_TRACE + static char fname[] = "xfs_alloc_ag_vextent_small"; +#endif + int i; + + if ((error = xfs_alloc_decrement(ccur, 0, &i))) + goto error0; + if (i) { + if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + } + /* + * Nothing in the btree, try the freelist. Make sure + * to respect minleft even when pulling from the + * freelist. + */ + else if (args->minlen == 1 && args->alignment == 1 && !args->isfl && + (INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_flcount, + ARCH_CONVERT) > args->minleft)) { + if ((error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno))) + goto error0; + if (fbno != NULLAGBLOCK) { + if (args->userdata) { + xfs_buf_t *bp; + + bp = xfs_btree_get_bufs(args->mp, args->tp, + args->agno, fbno, 0); + xfs_trans_binval(args->tp, bp); + } + args->len = 1; + args->agbno = fbno; + XFS_WANT_CORRUPTED_GOTO( + args->agbno + args->len <= + INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length, + ARCH_CONVERT), + error0); + args->wasfromfl = 1; + TRACE_ALLOC("freelist", args); + *stat = 0; + return 0; + } + /* + * Nothing in the freelist. + */ + else + flen = 0; + } + /* + * Can't allocate from the freelist for some reason. + */ + else + flen = 0; + /* + * Can't do the allocation, give up. + */ + if (flen < args->minlen) { + args->agbno = NULLAGBLOCK; + TRACE_ALLOC("notenough", args); + flen = 0; + } + *fbnop = fbno; + *flenp = flen; + *stat = 1; + TRACE_ALLOC("normal", args); + return 0; + +error0: + TRACE_ALLOC("error", args); + return error; +} + +/* + * Free the extent starting at agno/bno for length. + */ +STATIC int /* error */ +xfs_free_ag_extent( + xfs_trans_t *tp, /* transaction pointer */ + xfs_buf_t *agbp, /* buffer for a.g. freelist header */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_agblock_t bno, /* starting block number */ + xfs_extlen_t len, /* length of extent */ + int isfl) /* set if is freelist blocks - no sb acctg */ +{ + xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */ + xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */ + int error; /* error return value */ +#ifdef XFS_ALLOC_TRACE + static char fname[] = "xfs_free_ag_extent"; +#endif + xfs_agblock_t gtbno; /* start of right neighbor block */ + xfs_extlen_t gtlen; /* length of right neighbor block */ + int haveleft; /* have a left neighbor block */ + int haveright; /* have a right neighbor block */ + int i; /* temp, result code */ + xfs_agblock_t ltbno; /* start of left neighbor block */ + xfs_extlen_t ltlen; /* length of left neighbor block */ + xfs_mount_t *mp; /* mount point struct for filesystem */ + xfs_agblock_t nbno; /* new starting block of freespace */ + xfs_extlen_t nlen; /* new length of freespace */ + + mp = tp->t_mountp; + /* + * Allocate and initialize a cursor for the by-block btree. + */ + bno_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO, 0, + 0); + cnt_cur = NULL; + /* + * Look for a neighboring block on the left (lower block numbers) + * that is contiguous with this space. + */ + if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft))) + goto error0; + if (haveleft) { + /* + * There is a block to our left. + */ + if ((error = xfs_alloc_get_rec(bno_cur, <bno, <len, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * It's not contiguous, though. + */ + if (ltbno + ltlen < bno) + haveleft = 0; + else { + /* + * If this failure happens the request to free this + * space was invalid, it's (partly) already free. + * Very bad. + */ + XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0); + } + } + /* + * Look for a neighboring block on the right (higher block numbers) + * that is contiguous with this space. + */ + if ((error = xfs_alloc_increment(bno_cur, 0, &haveright))) + goto error0; + if (haveright) { + /* + * There is a block to our right. + */ + if ((error = xfs_alloc_get_rec(bno_cur, >bno, >len, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * It's not contiguous, though. + */ + if (bno + len < gtbno) + haveright = 0; + else { + /* + * If this failure happens the request to free this + * space was invalid, it's (partly) already free. + * Very bad. + */ + XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0); + } + } + /* + * Now allocate and initialize a cursor for the by-size tree. + */ + cnt_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT, 0, + 0); + /* + * Have both left and right contiguous neighbors. + * Merge all three into a single free block. + */ + if (haveleft && haveright) { + /* + * Delete the old by-size entry on the left. + */ + if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if ((error = xfs_alloc_delete(cnt_cur, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * Delete the old by-size entry on the right. + */ + if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if ((error = xfs_alloc_delete(cnt_cur, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * Delete the old by-block entry for the right block. + */ + if ((error = xfs_alloc_delete(bno_cur, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * Move the by-block cursor back to the left neighbor. + */ + if ((error = xfs_alloc_decrement(bno_cur, 0, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); +#ifdef DEBUG + /* + * Check that this is the right record: delete didn't + * mangle the cursor. + */ + { + xfs_agblock_t xxbno; + xfs_extlen_t xxlen; + + if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen, + &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO( + i == 1 && xxbno == ltbno && xxlen == ltlen, + error0); + } +#endif + /* + * Update remaining by-block entry to the new, joined block. + */ + nbno = ltbno; + nlen = len + ltlen + gtlen; + if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) + goto error0; + } + /* + * Have only a left contiguous neighbor. + * Merge it together with the new freespace. + */ + else if (haveleft) { + /* + * Delete the old by-size entry on the left. + */ + if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if ((error = xfs_alloc_delete(cnt_cur, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * Back up the by-block cursor to the left neighbor, and + * update its length. + */ + if ((error = xfs_alloc_decrement(bno_cur, 0, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + nbno = ltbno; + nlen = len + ltlen; + if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) + goto error0; + } + /* + * Have only a right contiguous neighbor. + * Merge it together with the new freespace. + */ + else if (haveright) { + /* + * Delete the old by-size entry on the right. + */ + if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if ((error = xfs_alloc_delete(cnt_cur, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * Update the starting block and length of the right + * neighbor in the by-block tree. + */ + nbno = bno; + nlen = len + gtlen; + if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) + goto error0; + } + /* + * No contiguous neighbors. + * Insert the new freespace into the by-block tree. + */ + else { + nbno = bno; + nlen = len; + if ((error = xfs_alloc_insert(bno_cur, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + } + xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); + bno_cur = NULL; + /* + * In all cases we need to insert the new freespace in the by-size tree. + */ + if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 0, error0); + if ((error = xfs_alloc_insert(cnt_cur, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + cnt_cur = NULL; + /* + * Update the freespace totals in the ag and superblock. + */ + { + xfs_agf_t *agf; + xfs_perag_t *pag; /* per allocation group data */ + + agf = XFS_BUF_TO_AGF(agbp); + pag = &mp->m_perag[agno]; + INT_MOD(agf->agf_freeblks, ARCH_CONVERT, len); + xfs_trans_agblocks_delta(tp, len); + pag->pagf_freeblks += len; + XFS_WANT_CORRUPTED_GOTO( + INT_GET(agf->agf_freeblks, ARCH_CONVERT) + <= INT_GET(agf->agf_length, ARCH_CONVERT), + error0); + TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS); + xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); + if (!isfl) + xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); + XFS_STATS_INC(xs_freex); + XFS_STATS_ADD(xs_freeb, len); + } + TRACE_FREE(haveleft ? + (haveright ? "both" : "left") : + (haveright ? "right" : "none"), + agno, bno, len, isfl); + + /* + * Since blocks move to the free list without the coordination + * used in xfs_bmap_finish, we can't allow block to be available + * for reallocation and non-transaction writing (user data) + * until we know that the transaction that moved it to the free + * list is permanently on disk. We track the blocks by declaring + * these blocks as "busy"; the busy list is maintained on a per-ag + * basis and each transaction records which entries should be removed + * when the iclog commits to disk. If a busy block is allocated, + * the iclog is pushed up to the LSN that freed the block. + */ + xfs_alloc_mark_busy(tp, agno, bno, len); + return 0; + + error0: + TRACE_FREE("error", agno, bno, len, isfl); + if (bno_cur) + xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); + if (cnt_cur) + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); + return error; +} + +/* + * Visible (exported) allocation/free functions. + * Some of these are used just by xfs_alloc_btree.c and this file. + */ + +/* + * Compute and fill in value of m_ag_maxlevels. + */ +void +xfs_alloc_compute_maxlevels( + xfs_mount_t *mp) /* file system mount structure */ +{ + int level; + uint maxblocks; + uint maxleafents; + int minleafrecs; + int minnoderecs; + + maxleafents = (mp->m_sb.sb_agblocks + 1) / 2; + minleafrecs = mp->m_alloc_mnr[0]; + minnoderecs = mp->m_alloc_mnr[1]; + maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; + for (level = 1; maxblocks > 1; level++) + maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; + mp->m_ag_maxlevels = level; +} + +/* + * Decide whether to use this allocation group for this allocation. + * If so, fix up the btree freelist's size. + */ +STATIC int /* error */ +xfs_alloc_fix_freelist( + xfs_alloc_arg_t *args, /* allocation argument structure */ + int flags) /* XFS_ALLOC_FLAG_... */ +{ + xfs_buf_t *agbp; /* agf buffer pointer */ + xfs_agf_t *agf; /* a.g. freespace structure pointer */ + xfs_buf_t *agflbp;/* agfl buffer pointer */ + xfs_agblock_t bno; /* freelist block */ + xfs_extlen_t delta; /* new blocks needed in freelist */ + int error; /* error result code */ + xfs_extlen_t longest;/* longest extent in allocation group */ + xfs_mount_t *mp; /* file system mount point structure */ + xfs_extlen_t need; /* total blocks needed in freelist */ + xfs_perag_t *pag; /* per-ag information structure */ + xfs_alloc_arg_t targs; /* local allocation arguments */ + xfs_trans_t *tp; /* transaction pointer */ + + mp = args->mp; + + pag = args->pag; + tp = args->tp; + if (!pag->pagf_init) { + if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags, + &agbp))) + return error; + if (!pag->pagf_init) { + args->agbp = NULL; + return 0; + } + } else + agbp = NULL; + + /* If this is a metadata prefered pag and we are user data + * then try somewhere else if we are not being asked to + * try harder at this point + */ + if (pag->pagf_metadata && args->userdata && flags) { + args->agbp = NULL; + return 0; + } + + need = XFS_MIN_FREELIST_PAG(pag, mp); + delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0; + /* + * If it looks like there isn't a long enough extent, or enough + * total blocks, reject it. + */ + longest = (pag->pagf_longest > delta) ? + (pag->pagf_longest - delta) : + (pag->pagf_flcount > 0 || pag->pagf_longest > 0); + if (args->minlen + args->alignment + args->minalignslop - 1 > longest || + (args->minleft && + (int)(pag->pagf_freeblks + pag->pagf_flcount - + need - args->total) < + (int)args->minleft)) { + if (agbp) + xfs_trans_brelse(tp, agbp); + args->agbp = NULL; + return 0; + } + /* + * Get the a.g. freespace buffer. + * Can fail if we're not blocking on locks, and it's held. + */ + if (agbp == NULL) { + if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags, + &agbp))) + return error; + if (agbp == NULL) { + args->agbp = NULL; + return 0; + } + } + /* + * Figure out how many blocks we should have in the freelist. + */ + agf = XFS_BUF_TO_AGF(agbp); + need = XFS_MIN_FREELIST(agf, mp); + delta = need > INT_GET(agf->agf_flcount, ARCH_CONVERT) ? + (need - INT_GET(agf->agf_flcount, ARCH_CONVERT)) : 0; + /* + * If there isn't enough total or single-extent, reject it. + */ + longest = INT_GET(agf->agf_longest, ARCH_CONVERT); + longest = (longest > delta) ? (longest - delta) : + (INT_GET(agf->agf_flcount, ARCH_CONVERT) > 0 || longest > 0); + if (args->minlen + args->alignment + args->minalignslop - 1 > longest || + (args->minleft && + (int)(INT_GET(agf->agf_freeblks, ARCH_CONVERT) + + INT_GET(agf->agf_flcount, ARCH_CONVERT) - need - args->total) < + (int)args->minleft)) { + xfs_trans_brelse(tp, agbp); + args->agbp = NULL; + return 0; + } + /* + * Make the freelist shorter if it's too long. + */ + while (INT_GET(agf->agf_flcount, ARCH_CONVERT) > need) { + xfs_buf_t *bp; + + if ((error = xfs_alloc_get_freelist(tp, agbp, &bno))) + return error; + if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1))) + return error; + bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); + xfs_trans_binval(tp, bp); + } + /* + * Initialize the args structure. + */ + targs.tp = tp; + targs.mp = mp; + targs.agbp = agbp; + targs.agno = args->agno; + targs.mod = targs.minleft = targs.wasdel = targs.userdata = + targs.minalignslop = 0; + targs.alignment = targs.minlen = targs.prod = targs.isfl = 1; + targs.type = XFS_ALLOCTYPE_THIS_AG; + targs.pag = pag; + if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp))) + return error; + /* + * Make the freelist longer if it's too short. + */ + while (INT_GET(agf->agf_flcount, ARCH_CONVERT) < need) { + targs.agbno = 0; + targs.maxlen = need - INT_GET(agf->agf_flcount, ARCH_CONVERT); + /* + * Allocate as many blocks as possible at once. + */ + if ((error = xfs_alloc_ag_vextent(&targs))) + return error; + /* + * Stop if we run out. Won't happen if callers are obeying + * the restrictions correctly. Can happen for free calls + * on a completely full ag. + */ + if (targs.agbno == NULLAGBLOCK) + break; + /* + * Put each allocated block on the list. + */ + for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) { + if ((error = xfs_alloc_put_freelist(tp, agbp, agflbp, + bno))) + return error; + } + } + args->agbp = agbp; + return 0; +} + +/* + * Get a block from the freelist. + * Returns with the buffer for the block gotten. + */ +int /* error */ +xfs_alloc_get_freelist( + xfs_trans_t *tp, /* transaction pointer */ + xfs_buf_t *agbp, /* buffer containing the agf structure */ + xfs_agblock_t *bnop) /* block address retrieved from freelist */ +{ + xfs_agf_t *agf; /* a.g. freespace structure */ + xfs_agfl_t *agfl; /* a.g. freelist structure */ + xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ + xfs_agblock_t bno; /* block number returned */ + int error; +#ifdef XFS_ALLOC_TRACE + static char fname[] = "xfs_alloc_get_freelist"; +#endif + xfs_mount_t *mp; /* mount structure */ + xfs_perag_t *pag; /* per allocation group data */ + + agf = XFS_BUF_TO_AGF(agbp); + /* + * Freelist is empty, give up. + */ + if (INT_ISZERO(agf->agf_flcount, ARCH_CONVERT)) { + *bnop = NULLAGBLOCK; + return 0; + } + /* + * Read the array of free blocks. + */ + mp = tp->t_mountp; + if ((error = xfs_alloc_read_agfl(mp, tp, + INT_GET(agf->agf_seqno, ARCH_CONVERT), &agflbp))) + return error; + agfl = XFS_BUF_TO_AGFL(agflbp); + /* + * Get the block number and update the data structures. + */ + bno = INT_GET(agfl->agfl_bno[INT_GET(agf->agf_flfirst, ARCH_CONVERT)], ARCH_CONVERT); + INT_MOD(agf->agf_flfirst, ARCH_CONVERT, 1); + xfs_trans_brelse(tp, agflbp); + if (INT_GET(agf->agf_flfirst, ARCH_CONVERT) == XFS_AGFL_SIZE(mp)) + INT_ZERO(agf->agf_flfirst, ARCH_CONVERT); + pag = &mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)]; + INT_MOD(agf->agf_flcount, ARCH_CONVERT, -1); + xfs_trans_agflist_delta(tp, -1); + pag->pagf_flcount--; + TRACE_MODAGF(NULL, agf, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT); + xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT); + *bnop = bno; + + /* + * As blocks are freed, they are added to the per-ag busy list + * and remain there until the freeing transaction is committed to + * disk. Now that we have allocated blocks, this list must be + * searched to see if a block is being reused. If one is, then + * the freeing transaction must be pushed to disk NOW by forcing + * to disk all iclogs up that transaction's LSN. + */ + xfs_alloc_search_busy(tp, INT_GET(agf->agf_seqno, ARCH_CONVERT), bno, 1); + return 0; +} + +/* + * Log the given fields from the agf structure. + */ +void +xfs_alloc_log_agf( + xfs_trans_t *tp, /* transaction pointer */ + xfs_buf_t *bp, /* buffer for a.g. freelist header */ + int fields) /* mask of fields to be logged (XFS_AGF_...) */ +{ + int first; /* first byte offset */ + int last; /* last byte offset */ + static const short offsets[] = { + offsetof(xfs_agf_t, agf_magicnum), + offsetof(xfs_agf_t, agf_versionnum), + offsetof(xfs_agf_t, agf_seqno), + offsetof(xfs_agf_t, agf_length), + offsetof(xfs_agf_t, agf_roots[0]), + offsetof(xfs_agf_t, agf_levels[0]), + offsetof(xfs_agf_t, agf_flfirst), + offsetof(xfs_agf_t, agf_fllast), + offsetof(xfs_agf_t, agf_flcount), + offsetof(xfs_agf_t, agf_freeblks), + offsetof(xfs_agf_t, agf_longest), + sizeof(xfs_agf_t) + }; + + xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last); + xfs_trans_log_buf(tp, bp, (uint)first, (uint)last); +} + +/* + * Interface for inode allocation to force the pag data to be initialized. + */ +int /* error */ +xfs_alloc_pagf_init( + xfs_mount_t *mp, /* file system mount structure */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + int flags) /* XFS_ALLOC_FLAGS_... */ +{ + xfs_buf_t *bp; + int error; + + if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp))) + return error; + if (bp) + xfs_trans_brelse(tp, bp); + return 0; +} + +/* + * Put the block on the freelist for the allocation group. + */ +int /* error */ +xfs_alloc_put_freelist( + xfs_trans_t *tp, /* transaction pointer */ + xfs_buf_t *agbp, /* buffer for a.g. freelist header */ + xfs_buf_t *agflbp,/* buffer for a.g. free block array */ + xfs_agblock_t bno) /* block being freed */ +{ + xfs_agf_t *agf; /* a.g. freespace structure */ + xfs_agfl_t *agfl; /* a.g. free block array */ + xfs_agblock_t *blockp;/* pointer to array entry */ + int error; +#ifdef XFS_ALLOC_TRACE + static char fname[] = "xfs_alloc_put_freelist"; +#endif + xfs_mount_t *mp; /* mount structure */ + xfs_perag_t *pag; /* per allocation group data */ + + agf = XFS_BUF_TO_AGF(agbp); + mp = tp->t_mountp; + + if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp, + INT_GET(agf->agf_seqno, ARCH_CONVERT), &agflbp))) + return error; + agfl = XFS_BUF_TO_AGFL(agflbp); + INT_MOD(agf->agf_fllast, ARCH_CONVERT, 1); + if (INT_GET(agf->agf_fllast, ARCH_CONVERT) == XFS_AGFL_SIZE(mp)) + INT_ZERO(agf->agf_fllast, ARCH_CONVERT); + pag = &mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)]; + INT_MOD(agf->agf_flcount, ARCH_CONVERT, 1); + xfs_trans_agflist_delta(tp, 1); + pag->pagf_flcount++; + ASSERT(INT_GET(agf->agf_flcount, ARCH_CONVERT) <= XFS_AGFL_SIZE(mp)); + blockp = &agfl->agfl_bno[INT_GET(agf->agf_fllast, ARCH_CONVERT)]; + INT_SET(*blockp, ARCH_CONVERT, bno); + TRACE_MODAGF(NULL, agf, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT); + xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT); + xfs_trans_log_buf(tp, agflbp, + (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl), + (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl + + sizeof(xfs_agblock_t) - 1)); + return 0; +} + +/* + * Read in the allocation group header (free/alloc section). + */ +int /* error */ +xfs_alloc_read_agf( + xfs_mount_t *mp, /* mount point structure */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + int flags, /* XFS_ALLOC_FLAG_... */ + xfs_buf_t **bpp) /* buffer for the ag freelist header */ +{ + xfs_agf_t *agf; /* ag freelist header */ + int agf_ok; /* set if agf is consistent */ + xfs_buf_t *bp; /* return value */ + xfs_perag_t *pag; /* per allocation group data */ + int error; + + ASSERT(agno != NULLAGNUMBER); + error = xfs_trans_read_buf( + mp, tp, mp->m_ddev_targp, + XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), + XFS_FSS_TO_BB(mp, 1), + (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0U, + &bp); + if (error) + return error; + ASSERT(!bp || !XFS_BUF_GETERROR(bp)); + if (!bp) { + *bpp = NULL; + return 0; + } + /* + * Validate the magic number of the agf block. + */ + agf = XFS_BUF_TO_AGF(bp); + agf_ok = + INT_GET(agf->agf_magicnum, ARCH_CONVERT) == XFS_AGF_MAGIC && + XFS_AGF_GOOD_VERSION( + INT_GET(agf->agf_versionnum, ARCH_CONVERT)) && + INT_GET(agf->agf_freeblks, ARCH_CONVERT) <= + INT_GET(agf->agf_length, ARCH_CONVERT) && + INT_GET(agf->agf_flfirst, ARCH_CONVERT) < XFS_AGFL_SIZE(mp) && + INT_GET(agf->agf_fllast, ARCH_CONVERT) < XFS_AGFL_SIZE(mp) && + INT_GET(agf->agf_flcount, ARCH_CONVERT) <= XFS_AGFL_SIZE(mp); + if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF, + XFS_RANDOM_ALLOC_READ_AGF))) { + XFS_CORRUPTION_ERROR("xfs_alloc_read_agf", + XFS_ERRLEVEL_LOW, mp, agf); + xfs_trans_brelse(tp, bp); + return XFS_ERROR(EFSCORRUPTED); + } + pag = &mp->m_perag[agno]; + if (!pag->pagf_init) { + pag->pagf_freeblks = INT_GET(agf->agf_freeblks, ARCH_CONVERT); + pag->pagf_flcount = INT_GET(agf->agf_flcount, ARCH_CONVERT); + pag->pagf_longest = INT_GET(agf->agf_longest, ARCH_CONVERT); + pag->pagf_levels[XFS_BTNUM_BNOi] = + INT_GET(agf->agf_levels[XFS_BTNUM_BNOi], ARCH_CONVERT); + pag->pagf_levels[XFS_BTNUM_CNTi] = + INT_GET(agf->agf_levels[XFS_BTNUM_CNTi], ARCH_CONVERT); + spinlock_init(&pag->pagb_lock, "xfspagb"); + pag->pagb_list = kmem_zalloc(XFS_PAGB_NUM_SLOTS * + sizeof(xfs_perag_busy_t), KM_SLEEP); + pag->pagf_init = 1; + } +#ifdef DEBUG + else if (!XFS_FORCED_SHUTDOWN(mp)) { + ASSERT(pag->pagf_freeblks == INT_GET(agf->agf_freeblks, ARCH_CONVERT)); + ASSERT(pag->pagf_flcount == INT_GET(agf->agf_flcount, ARCH_CONVERT)); + ASSERT(pag->pagf_longest == INT_GET(agf->agf_longest, ARCH_CONVERT)); + ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] == + INT_GET(agf->agf_levels[XFS_BTNUM_BNOi], ARCH_CONVERT)); + ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] == + INT_GET(agf->agf_levels[XFS_BTNUM_CNTi], ARCH_CONVERT)); + } +#endif + XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGF, XFS_AGF_REF); + *bpp = bp; + return 0; +} + +/* + * Allocate an extent (variable-size). + * Depending on the allocation type, we either look in a single allocation + * group or loop over the allocation groups to find the result. + */ +int /* error */ +xfs_alloc_vextent( + xfs_alloc_arg_t *args) /* allocation argument structure */ +{ + xfs_agblock_t agsize; /* allocation group size */ + int error; + int flags; /* XFS_ALLOC_FLAG_... locking flags */ +#ifdef XFS_ALLOC_TRACE + static char fname[] = "xfs_alloc_vextent"; +#endif + xfs_extlen_t minleft;/* minimum left value, temp copy */ + xfs_mount_t *mp; /* mount structure pointer */ + xfs_agnumber_t sagno; /* starting allocation group number */ + xfs_alloctype_t type; /* input allocation type */ + int bump_rotor = 0; + int no_min = 0; + + mp = args->mp; + type = args->otype = args->type; + args->agbno = NULLAGBLOCK; + /* + * Just fix this up, for the case where the last a.g. is shorter + * (or there's only one a.g.) and the caller couldn't easily figure + * that out (xfs_bmap_alloc). + */ + agsize = mp->m_sb.sb_agblocks; + if (args->maxlen > agsize) + args->maxlen = agsize; + if (args->alignment == 0) + args->alignment = 1; + ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount); + ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize); + ASSERT(args->minlen <= args->maxlen); + ASSERT(args->minlen <= agsize); + ASSERT(args->mod < args->prod); + if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount || + XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize || + args->minlen > args->maxlen || args->minlen > agsize || + args->mod >= args->prod) { + args->fsbno = NULLFSBLOCK; + TRACE_ALLOC("badargs", args); + return 0; + } + minleft = args->minleft; + + switch (type) { + case XFS_ALLOCTYPE_THIS_AG: + case XFS_ALLOCTYPE_NEAR_BNO: + case XFS_ALLOCTYPE_THIS_BNO: + /* + * These three force us into a single a.g. + */ + args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); + down_read(&mp->m_peraglock); + args->pag = &mp->m_perag[args->agno]; + args->minleft = 0; + error = xfs_alloc_fix_freelist(args, 0); + args->minleft = minleft; + if (error) { + TRACE_ALLOC("nofix", args); + goto error0; + } + if (!args->agbp) { + up_read(&mp->m_peraglock); + TRACE_ALLOC("noagbp", args); + break; + } + args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); + if ((error = xfs_alloc_ag_vextent(args))) + goto error0; + up_read(&mp->m_peraglock); + break; + case XFS_ALLOCTYPE_START_BNO: + /* + * Try near allocation first, then anywhere-in-ag after + * the first a.g. fails. + */ + if ((args->userdata == XFS_ALLOC_INITIAL_USER_DATA) && + (mp->m_flags & XFS_MOUNT_32BITINODES)) { + args->fsbno = XFS_AGB_TO_FSB(mp, mp->m_agfrotor, 0); + bump_rotor = 1; + } + args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); + args->type = XFS_ALLOCTYPE_NEAR_BNO; + /* FALLTHROUGH */ + case XFS_ALLOCTYPE_ANY_AG: + case XFS_ALLOCTYPE_START_AG: + case XFS_ALLOCTYPE_FIRST_AG: + /* + * Rotate through the allocation groups looking for a winner. + */ + if (type == XFS_ALLOCTYPE_ANY_AG) { + /* + * Start with the last place we left off. + */ + args->agno = sagno = mp->m_agfrotor; + args->type = XFS_ALLOCTYPE_THIS_AG; + flags = XFS_ALLOC_FLAG_TRYLOCK; + } else if (type == XFS_ALLOCTYPE_FIRST_AG) { + /* + * Start with allocation group given by bno. + */ + args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); + args->type = XFS_ALLOCTYPE_THIS_AG; + sagno = 0; + flags = 0; + } else { + if (type == XFS_ALLOCTYPE_START_AG) + args->type = XFS_ALLOCTYPE_THIS_AG; + /* + * Start with the given allocation group. + */ + args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno); + flags = XFS_ALLOC_FLAG_TRYLOCK; + } + /* + * Loop over allocation groups twice; first time with + * trylock set, second time without. + */ + down_read(&mp->m_peraglock); + for (;;) { + args->pag = &mp->m_perag[args->agno]; + if (no_min) args->minleft = 0; + error = xfs_alloc_fix_freelist(args, flags); + args->minleft = minleft; + if (error) { + TRACE_ALLOC("nofix", args); + goto error0; + } + /* + * If we get a buffer back then the allocation will fly. + */ + if (args->agbp) { + if ((error = xfs_alloc_ag_vextent(args))) + goto error0; + break; + } + TRACE_ALLOC("loopfailed", args); + /* + * Didn't work, figure out the next iteration. + */ + if (args->agno == sagno && + type == XFS_ALLOCTYPE_START_BNO) + args->type = XFS_ALLOCTYPE_THIS_AG; + if (++(args->agno) == mp->m_sb.sb_agcount) + args->agno = 0; + /* + * Reached the starting a.g., must either be done + * or switch to non-trylock mode. + */ + if (args->agno == sagno) { + if (no_min == 1) { + args->agbno = NULLAGBLOCK; + TRACE_ALLOC("allfailed", args); + break; + } + if (flags == 0) { + no_min = 1; + } else { + flags = 0; + if (type == XFS_ALLOCTYPE_START_BNO) { + args->agbno = XFS_FSB_TO_AGBNO(mp, + args->fsbno); + args->type = XFS_ALLOCTYPE_NEAR_BNO; + } + } + } + } + up_read(&mp->m_peraglock); + if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) + mp->m_agfrotor = (args->agno + 1) % mp->m_sb.sb_agcount; + break; + default: + ASSERT(0); + /* NOTREACHED */ + } + if (args->agbno == NULLAGBLOCK) + args->fsbno = NULLFSBLOCK; + else { + args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno); +#ifdef DEBUG + ASSERT(args->len >= args->minlen); + ASSERT(args->len <= args->maxlen); + ASSERT(args->agbno % args->alignment == 0); + XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), + args->len); +#endif + } + return 0; +error0: + up_read(&mp->m_peraglock); + return error; +} + +/* + * Free an extent. + * Just break up the extent address and hand off to xfs_free_ag_extent + * after fixing up the freelist. + */ +int /* error */ +xfs_free_extent( + xfs_trans_t *tp, /* transaction pointer */ + xfs_fsblock_t bno, /* starting block number of extent */ + xfs_extlen_t len) /* length of extent */ +{ +#ifdef DEBUG + xfs_agf_t *agf; /* a.g. freespace header */ +#endif + xfs_alloc_arg_t args; /* allocation argument structure */ + int error; + + ASSERT(len != 0); + args.tp = tp; + args.mp = tp->t_mountp; + args.agno = XFS_FSB_TO_AGNO(args.mp, bno); + ASSERT(args.agno < args.mp->m_sb.sb_agcount); + args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); + args.alignment = 1; + args.minlen = args.minleft = args.minalignslop = 0; + down_read(&args.mp->m_peraglock); + args.pag = &args.mp->m_perag[args.agno]; + if ((error = xfs_alloc_fix_freelist(&args, 0))) + goto error0; +#ifdef DEBUG + ASSERT(args.agbp != NULL); + agf = XFS_BUF_TO_AGF(args.agbp); + ASSERT(args.agbno + len <= INT_GET(agf->agf_length, ARCH_CONVERT)); +#endif + error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, + len, 0); +error0: + up_read(&args.mp->m_peraglock); + return error; +} + + +/* + * AG Busy list management + * The busy list contains block ranges that have been freed but whose + * transacations have not yet hit disk. If any block listed in a busy + * list is reused, the transaction that freed it must be forced to disk + * before continuing to use the block. + * + * xfs_alloc_mark_busy - add to the per-ag busy list + * xfs_alloc_clear_busy - remove an item from the per-ag busy list + */ +void +xfs_alloc_mark_busy(xfs_trans_t *tp, + xfs_agnumber_t agno, + xfs_agblock_t bno, + xfs_extlen_t len) +{ + xfs_mount_t *mp; + xfs_perag_busy_t *bsy; + int n; + SPLDECL(s); + + mp = tp->t_mountp; + s = mutex_spinlock(&mp->m_perag[agno].pagb_lock); + + /* search pagb_list for an open slot */ + for (bsy = mp->m_perag[agno].pagb_list, n = 0; + n < XFS_PAGB_NUM_SLOTS; + bsy++, n++) { + if (bsy->busy_tp == NULL) { + break; + } + } + + if (n < XFS_PAGB_NUM_SLOTS) { + bsy = &mp->m_perag[agno].pagb_list[n]; + mp->m_perag[agno].pagb_count++; + TRACE_BUSY("xfs_alloc_mark_busy", "got", agno, bno, len, n, tp); + bsy->busy_start = bno; + bsy->busy_length = len; + bsy->busy_tp = tp; + xfs_trans_add_busy(tp, agno, n); + } else { + TRACE_BUSY("xfs_alloc_mark_busy", "FULL", agno, bno, len, -1, tp); + /* + * The busy list is full! Since it is now not possible to + * track the free block, make this a synchronous transaction + * to insure that the block is not reused before this + * transaction commits. + */ + xfs_trans_set_sync(tp); + } + + mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s); +} + +void +xfs_alloc_clear_busy(xfs_trans_t *tp, + xfs_agnumber_t agno, + int idx) +{ + xfs_mount_t *mp; + xfs_perag_busy_t *list; + SPLDECL(s); + + mp = tp->t_mountp; + + s = mutex_spinlock(&mp->m_perag[agno].pagb_lock); + list = mp->m_perag[agno].pagb_list; + + ASSERT(idx < XFS_PAGB_NUM_SLOTS); + if (list[idx].busy_tp == tp) { + TRACE_UNBUSY("xfs_alloc_clear_busy", "found", agno, idx, tp); + list[idx].busy_tp = NULL; + mp->m_perag[agno].pagb_count--; + } else { + TRACE_UNBUSY("xfs_alloc_clear_busy", "missing", agno, idx, tp); + } + + mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s); +} + + +/* + * returns non-zero if any of (agno,bno):len is in a busy list + */ +int +xfs_alloc_search_busy(xfs_trans_t *tp, + xfs_agnumber_t agno, + xfs_agblock_t bno, + xfs_extlen_t len) +{ + xfs_mount_t *mp; + xfs_perag_busy_t *bsy; + int n; + xfs_agblock_t uend, bend; + xfs_lsn_t lsn; + int cnt; + SPLDECL(s); + + mp = tp->t_mountp; + + s = mutex_spinlock(&mp->m_perag[agno].pagb_lock); + cnt = mp->m_perag[agno].pagb_count; + + uend = bno + len - 1; + + /* search pagb_list for this slot, skipping open slots */ + for (bsy = mp->m_perag[agno].pagb_list, n = 0; + cnt; bsy++, n++) { + + /* + * (start1,length1) within (start2, length2) + */ + if (bsy->busy_tp != NULL) { + bend = bsy->busy_start + bsy->busy_length - 1; + if ((bno > bend) || + (uend < bsy->busy_start)) { + cnt--; + } else { + TRACE_BUSYSEARCH("xfs_alloc_search_busy", + "found1", agno, bno, len, n, + tp); + break; + } + } + } + + /* + * If a block was found, force the log through the LSN of the + * transaction that freed the block + */ + if (cnt) { + TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, n, tp); + lsn = bsy->busy_tp->t_commit_lsn; + mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s); + xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); + } else { + TRACE_BUSYSEARCH("xfs_alloc_search_busy", "not-found", agno, bno, len, n, tp); + n = -1; + mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s); + } + + return n; +} diff --git a/sys/gnu/fs/xfs/xfs_alloc.h b/sys/gnu/fs/xfs/xfs_alloc.h new file mode 100644 index 000000000000..72329c86351c --- /dev/null +++ b/sys/gnu/fs/xfs/xfs_alloc.h @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_ALLOC_H__ +#define __XFS_ALLOC_H__ + +struct xfs_buf; +struct xfs_mount; +struct xfs_perag; +struct xfs_trans; + +/* + * Freespace allocation types. Argument to xfs_alloc_[v]extent. + */ +typedef enum xfs_alloctype +{ + XFS_ALLOCTYPE_ANY_AG, /* allocate anywhere, use rotor */ + XFS_ALLOCTYPE_FIRST_AG, /* ... start at ag 0 */ + XFS_ALLOCTYPE_START_AG, /* anywhere, start in this a.g. */ + XFS_ALLOCTYPE_THIS_AG, /* anywhere in this a.g. */ + XFS_ALLOCTYPE_START_BNO, /* near this block else anywhere */ + XFS_ALLOCTYPE_NEAR_BNO, /* in this a.g. and near this block */ + XFS_ALLOCTYPE_THIS_BNO /* at exactly this block */ +} xfs_alloctype_t; + +/* + * Flags for xfs_alloc_fix_freelist. + */ +#define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */ + +/* + * Argument structure for xfs_alloc routines. + * This is turned into a structure to avoid having 20 arguments passed + * down several levels of the stack. + */ +typedef struct xfs_alloc_arg { + struct xfs_trans *tp; /* transaction pointer */ + struct xfs_mount *mp; /* file system mount point */ + struct xfs_buf *agbp; /* buffer for a.g. freelist header */ + struct xfs_perag *pag; /* per-ag struct for this agno */ + xfs_fsblock_t fsbno; /* file system block number */ + xfs_agnumber_t agno; /* allocation group number */ + xfs_agblock_t agbno; /* allocation group-relative block # */ + xfs_extlen_t minlen; /* minimum size of extent */ + xfs_extlen_t maxlen; /* maximum size of extent */ + xfs_extlen_t mod; /* mod value for extent size */ + xfs_extlen_t prod; /* prod value for extent size */ + xfs_extlen_t minleft; /* min blocks must be left after us */ + xfs_extlen_t total; /* total blocks needed in xaction */ + xfs_extlen_t alignment; /* align answer to multiple of this */ + xfs_extlen_t minalignslop; /* slop for minlen+alignment calcs */ + xfs_extlen_t len; /* output: actual size of extent */ + xfs_alloctype_t type; /* allocation type XFS_ALLOCTYPE_... */ + xfs_alloctype_t otype; /* original allocation type */ + char wasdel; /* set if allocation was prev delayed */ + char wasfromfl; /* set if allocation is from freelist */ + char isfl; /* set if is freelist blocks - !actg */ + char userdata; /* set if this is user data */ +} xfs_alloc_arg_t; + +/* + * Defines for userdata + */ +#define XFS_ALLOC_USERDATA 1 /* allocation is for user data*/ +#define XFS_ALLOC_INITIAL_USER_DATA 2 /* special case start of file */ + + +#ifdef __KERNEL__ + +#if defined(XFS_ALLOC_TRACE) +/* + * Allocation tracing buffer size. + */ +#define XFS_ALLOC_TRACE_SIZE 4096 +extern ktrace_t *xfs_alloc_trace_buf; + +/* + * Types for alloc tracing. + */ +#define XFS_ALLOC_KTRACE_ALLOC 1 +#define XFS_ALLOC_KTRACE_FREE 2 +#define XFS_ALLOC_KTRACE_MODAGF 3 +#define XFS_ALLOC_KTRACE_BUSY 4 +#define XFS_ALLOC_KTRACE_UNBUSY 5 +#define XFS_ALLOC_KTRACE_BUSYSEARCH 6 +#endif + +/* + * Compute and fill in value of m_ag_maxlevels. + */ +void +xfs_alloc_compute_maxlevels( + struct xfs_mount *mp); /* file system mount structure */ + +/* + * Get a block from the freelist. + * Returns with the buffer for the block gotten. + */ +int /* error */ +xfs_alloc_get_freelist( + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_buf *agbp, /* buffer containing the agf structure */ + xfs_agblock_t *bnop); /* block address retrieved from freelist */ + +/* + * Log the given fields from the agf structure. + */ +void +xfs_alloc_log_agf( + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_buf *bp, /* buffer for a.g. freelist header */ + int fields);/* mask of fields to be logged (XFS_AGF_...) */ + +/* + * Interface for inode allocation to force the pag data to be initialized. + */ +int /* error */ +xfs_alloc_pagf_init( + struct xfs_mount *mp, /* file system mount structure */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + int flags); /* XFS_ALLOC_FLAGS_... */ + +/* + * Put the block on the freelist for the allocation group. + */ +int /* error */ +xfs_alloc_put_freelist( + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_buf *agbp, /* buffer for a.g. freelist header */ + struct xfs_buf *agflbp,/* buffer for a.g. free block array */ + xfs_agblock_t bno); /* block being freed */ + +/* + * Read in the allocation group header (free/alloc section). + */ +int /* error */ +xfs_alloc_read_agf( + struct xfs_mount *mp, /* mount point structure */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + int flags, /* XFS_ALLOC_FLAG_... */ + struct xfs_buf **bpp); /* buffer for the ag freelist header */ + +/* + * Allocate an extent (variable-size). + */ +int /* error */ +xfs_alloc_vextent( + xfs_alloc_arg_t *args); /* allocation argument structure */ + +/* + * Free an extent. + */ +int /* error */ +xfs_free_extent( + struct xfs_trans *tp, /* transaction pointer */ + xfs_fsblock_t bno, /* starting block number of extent */ + xfs_extlen_t len); /* length of extent */ + +void +xfs_alloc_mark_busy(xfs_trans_t *tp, + xfs_agnumber_t agno, + xfs_agblock_t bno, + xfs_extlen_t len); + +void +xfs_alloc_clear_busy(xfs_trans_t *tp, + xfs_agnumber_t ag, + int idx); + + +#endif /* __KERNEL__ */ + +#endif /* __XFS_ALLOC_H__ */ diff --git a/sys/gnu/fs/xfs/xfs_alloc_btree.c b/sys/gnu/fs/xfs/xfs_alloc_btree.c new file mode 100644 index 000000000000..55c405cc5609 --- /dev/null +++ b/sys/gnu/fs/xfs/xfs_alloc_btree.c @@ -0,0 +1,2204 @@ +/* + * Copyright (c) 2000-2001 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * Free space allocation for XFS. + */ + +#include "xfs.h" +#include "xfs_macros.h" +#include "xfs_types.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_alloc.h" +#include "xfs_error.h" + +/* + * Prototypes for internal functions. + */ + +STATIC void xfs_alloc_log_block(xfs_trans_t *, xfs_buf_t *, int); +STATIC void xfs_alloc_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); +STATIC void xfs_alloc_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); +STATIC void xfs_alloc_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int); +STATIC int xfs_alloc_lshift(xfs_btree_cur_t *, int, int *); +STATIC int xfs_alloc_newroot(xfs_btree_cur_t *, int *); +STATIC int xfs_alloc_rshift(xfs_btree_cur_t *, int, int *); +STATIC int xfs_alloc_split(xfs_btree_cur_t *, int, xfs_agblock_t *, + xfs_alloc_key_t *, xfs_btree_cur_t **, int *); +STATIC int xfs_alloc_updkey(xfs_btree_cur_t *, xfs_alloc_key_t *, int); + +/* + * Internal functions. + */ + +/* + * Single level of the xfs_alloc_delete record deletion routine. + * Delete record pointed to by cur/level. + * Remove the record from its block then rebalance the tree. + * Return 0 for error, 1 for done, 2 to go on to the next level. + */ +STATIC int /* error */ +xfs_alloc_delrec( + xfs_btree_cur_t *cur, /* btree cursor */ + int level, /* level removing record from */ + int *stat) /* fail/done/go-on */ +{ + xfs_agf_t *agf; /* allocation group freelist header */ + xfs_alloc_block_t *block; /* btree block record/key lives in */ + xfs_agblock_t bno; /* btree block number */ + xfs_buf_t *bp; /* buffer for block */ + int error; /* error return value */ + int i; /* loop index */ + xfs_alloc_key_t key; /* kp points here if block is level 0 */ + xfs_agblock_t lbno; /* left block's block number */ + xfs_buf_t *lbp; /* left block's buffer pointer */ + xfs_alloc_block_t *left; /* left btree block */ + xfs_alloc_key_t *lkp=NULL; /* left block key pointer */ + xfs_alloc_ptr_t *lpp=NULL; /* left block address pointer */ + int lrecs=0; /* number of records in left block */ + xfs_alloc_rec_t *lrp; /* left block record pointer */ + xfs_mount_t *mp; /* mount structure */ + int ptr; /* index in btree block for this rec */ + xfs_agblock_t rbno; /* right block's block number */ + xfs_buf_t *rbp; /* right block's buffer pointer */ + xfs_alloc_block_t *right; /* right btree block */ + xfs_alloc_key_t *rkp; /* right block key pointer */ + xfs_alloc_ptr_t *rpp; /* right block address pointer */ + int rrecs=0; /* number of records in right block */ + xfs_alloc_rec_t *rrp; /* right block record pointer */ + xfs_btree_cur_t *tcur; /* temporary btree cursor */ + + /* + * Get the index of the entry being deleted, check for nothing there. + */ + ptr = cur->bc_ptrs[level]; + if (ptr == 0) { + *stat = 0; + return 0; + } + /* + * Get the buffer & block containing the record or key/ptr. + */ + bp = cur->bc_bufs[level]; + block = XFS_BUF_TO_ALLOC_BLOCK(bp); +#ifdef DEBUG + if ((error = xfs_btree_check_sblock(cur, block, level, bp))) + return error; +#endif + /* + * Fail if we're off the end of the block. + */ + if (ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + *stat = 0; + return 0; + } + XFS_STATS_INC(xs_abt_delrec); + /* + * It's a nonleaf. Excise the key and ptr being deleted, by + * sliding the entries past them down one. + * Log the changed areas of the block. + */ + if (level > 0) { + lkp = XFS_ALLOC_KEY_ADDR(block, 1, cur); + lpp = XFS_ALLOC_PTR_ADDR(block, 1, cur); +#ifdef DEBUG + for (i = ptr; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) { + if ((error = xfs_btree_check_sptr(cur, INT_GET(lpp[i], ARCH_CONVERT), level))) + return error; + } +#endif + if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + memmove(&lkp[ptr - 1], &lkp[ptr], + (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*lkp)); /* INT_: mem copy */ + memmove(&lpp[ptr - 1], &lpp[ptr], + (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*lpp)); /* INT_: mem copy */ + xfs_alloc_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1); + xfs_alloc_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1); + } + } + /* + * It's a leaf. Excise the record being deleted, by sliding the + * entries past it down one. Log the changed areas of the block. + */ + else { + lrp = XFS_ALLOC_REC_ADDR(block, 1, cur); + if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + memmove(&lrp[ptr - 1], &lrp[ptr], + (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*lrp)); + xfs_alloc_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1); + } + /* + * If it's the first record in the block, we'll need a key + * structure to pass up to the next level (updkey). + */ + if (ptr == 1) { + key.ar_startblock = lrp->ar_startblock; /* INT_: direct copy */ + key.ar_blockcount = lrp->ar_blockcount; /* INT_: direct copy */ + lkp = &key; + } + } + /* + * Decrement and log the number of entries in the block. + */ + INT_MOD(block->bb_numrecs, ARCH_CONVERT, -1); + xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS); + /* + * See if the longest free extent in the allocation group was + * changed by this operation. True if it's the by-size btree, and + * this is the leaf level, and there is no right sibling block, + * and this was the last record. + */ + agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + mp = cur->bc_mp; + + if (level == 0 && + cur->bc_btnum == XFS_BTNUM_CNT && + INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK && + ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + ASSERT(ptr == INT_GET(block->bb_numrecs, ARCH_CONVERT) + 1); + /* + * There are still records in the block. Grab the size + * from the last one. + */ + if (INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + rrp = XFS_ALLOC_REC_ADDR(block, INT_GET(block->bb_numrecs, ARCH_CONVERT), cur); + INT_COPY(agf->agf_longest, rrp->ar_blockcount, ARCH_CONVERT); + } + /* + * No free extents left. + */ + else + INT_ZERO(agf->agf_longest, ARCH_CONVERT); + mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)].pagf_longest = + INT_GET(agf->agf_longest, ARCH_CONVERT); + xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, + XFS_AGF_LONGEST); + } + /* + * Is this the root level? If so, we're almost done. + */ + if (level == cur->bc_nlevels - 1) { + /* + * If this is the root level, + * and there's only one entry left, + * and it's NOT the leaf level, + * then we can get rid of this level. + */ + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) == 1 && level > 0) { + /* + * lpp is still set to the first pointer in the block. + * Make it the new root of the btree. + */ + bno = INT_GET(agf->agf_roots[cur->bc_btnum], ARCH_CONVERT); + INT_COPY(agf->agf_roots[cur->bc_btnum], *lpp, ARCH_CONVERT); + INT_MOD(agf->agf_levels[cur->bc_btnum], ARCH_CONVERT, -1); + mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)].pagf_levels[cur->bc_btnum]--; + /* + * Put this buffer/block on the ag's freelist. + */ + if ((error = xfs_alloc_put_freelist(cur->bc_tp, + cur->bc_private.a.agbp, NULL, bno))) + return error; + /* + * Since blocks move to the free list without the + * coordination used in xfs_bmap_finish, we can't allow + * block to be available for reallocation and + * non-transaction writing (user data) until we know + * that the transaction that moved it to the free list + * is permanently on disk. We track the blocks by + * declaring these blocks as "busy"; the busy list is + * maintained on a per-ag basis and each transaction + * records which entries should be removed when the + * iclog commits to disk. If a busy block is + * allocated, the iclog is pushed up to the LSN + * that freed the block. + */ + xfs_alloc_mark_busy(cur->bc_tp, + INT_GET(agf->agf_seqno, ARCH_CONVERT), bno, 1); + + xfs_trans_agbtree_delta(cur->bc_tp, -1); + xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, + XFS_AGF_ROOTS | XFS_AGF_LEVELS); + /* + * Update the cursor so there's one fewer level. + */ + xfs_btree_setbuf(cur, level, 0); + cur->bc_nlevels--; + } else if (level > 0 && + (error = xfs_alloc_decrement(cur, level, &i))) + return error; + *stat = 1; + return 0; + } + /* + * If we deleted the leftmost entry in the block, update the + * key values above us in the tree. + */ + if (ptr == 1 && (error = xfs_alloc_updkey(cur, lkp, level + 1))) + return error; + /* + * If the number of records remaining in the block is at least + * the minimum, we're done. + */ + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) { + if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i))) + return error; + *stat = 1; + return 0; + } + /* + * Otherwise, we have to move some records around to keep the + * tree balanced. Look at the left and right sibling blocks to + * see if we can re-balance by moving only one record. + */ + rbno = INT_GET(block->bb_rightsib, ARCH_CONVERT); + lbno = INT_GET(block->bb_leftsib, ARCH_CONVERT); + bno = NULLAGBLOCK; + ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK); + /* + * Duplicate the cursor so our btree manipulations here won't + * disrupt the next level up. + */ + if ((error = xfs_btree_dup_cursor(cur, &tcur))) + return error; + /* + * If there's a right sibling, see if it's ok to shift an entry + * out of it. + */ + if (rbno != NULLAGBLOCK) { + /* + * Move the temp cursor to the last entry in the next block. + * Actually any entry but the first would suffice. + */ + i = xfs_btree_lastrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if ((error = xfs_alloc_increment(tcur, level, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + i = xfs_btree_lastrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * Grab a pointer to the block. + */ + rbp = tcur->bc_bufs[level]; + right = XFS_BUF_TO_ALLOC_BLOCK(rbp); +#ifdef DEBUG + if ((error = xfs_btree_check_sblock(cur, right, level, rbp))) + goto error0; +#endif + /* + * Grab the current block number, for future use. + */ + bno = INT_GET(right->bb_leftsib, ARCH_CONVERT); + /* + * If right block is full enough so that removing one entry + * won't make it too empty, and left-shifting an entry out + * of right to us works, we're done. + */ + if (INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1 >= + XFS_ALLOC_BLOCK_MINRECS(level, cur)) { + if ((error = xfs_alloc_lshift(tcur, level, &i))) + goto error0; + if (i) { + ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >= + XFS_ALLOC_BLOCK_MINRECS(level, cur)); + xfs_btree_del_cursor(tcur, + XFS_BTREE_NOERROR); + if (level > 0 && + (error = xfs_alloc_decrement(cur, level, + &i))) + return error; + *stat = 1; + return 0; + } + } + /* + * Otherwise, grab the number of records in right for + * future reference, and fix up the temp cursor to point + * to our block again (last record). + */ + rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT); + if (lbno != NULLAGBLOCK) { + i = xfs_btree_firstrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if ((error = xfs_alloc_decrement(tcur, level, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + } + } + /* + * If there's a left sibling, see if it's ok to shift an entry + * out of it. + */ + if (lbno != NULLAGBLOCK) { + /* + * Move the temp cursor to the first entry in the + * previous block. + */ + i = xfs_btree_firstrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if ((error = xfs_alloc_decrement(tcur, level, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + xfs_btree_firstrec(tcur, level); + /* + * Grab a pointer to the block. + */ + lbp = tcur->bc_bufs[level]; + left = XFS_BUF_TO_ALLOC_BLOCK(lbp); +#ifdef DEBUG + if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) + goto error0; +#endif + /* + * Grab the current block number, for future use. + */ + bno = INT_GET(left->bb_rightsib, ARCH_CONVERT); + /* + * If left block is full enough so that removing one entry + * won't make it too empty, and right-shifting an entry out + * of left to us works, we're done. + */ + if (INT_GET(left->bb_numrecs, ARCH_CONVERT) - 1 >= + XFS_ALLOC_BLOCK_MINRECS(level, cur)) { + if ((error = xfs_alloc_rshift(tcur, level, &i))) + goto error0; + if (i) { + ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >= + XFS_ALLOC_BLOCK_MINRECS(level, cur)); + xfs_btree_del_cursor(tcur, + XFS_BTREE_NOERROR); + if (level == 0) + cur->bc_ptrs[0]++; + *stat = 1; + return 0; + } + } + /* + * Otherwise, grab the number of records in right for + * future reference. + */ + lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT); + } + /* + * Delete the temp cursor, we're done with it. + */ + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + /* + * If here, we need to do a join to keep the tree balanced. + */ + ASSERT(bno != NULLAGBLOCK); + /* + * See if we can join with the left neighbor block. + */ + if (lbno != NULLAGBLOCK && + lrecs + INT_GET(block->bb_numrecs, ARCH_CONVERT) <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { + /* + * Set "right" to be the starting block, + * "left" to be the left neighbor. + */ + rbno = bno; + right = block; + rbp = bp; + if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, + cur->bc_private.a.agno, lbno, 0, &lbp, + XFS_ALLOC_BTREE_REF))) + return error; + left = XFS_BUF_TO_ALLOC_BLOCK(lbp); + if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) + return error; + } + /* + * If that won't work, see if we can join with the right neighbor block. + */ + else if (rbno != NULLAGBLOCK && + rrecs + INT_GET(block->bb_numrecs, ARCH_CONVERT) <= + XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { + /* + * Set "left" to be the starting block, + * "right" to be the right neighbor. + */ + lbno = bno; + left = block; + lbp = bp; + if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, + cur->bc_private.a.agno, rbno, 0, &rbp, + XFS_ALLOC_BTREE_REF))) + return error; + right = XFS_BUF_TO_ALLOC_BLOCK(rbp); + if ((error = xfs_btree_check_sblock(cur, right, level, rbp))) + return error; + } + /* + * Otherwise, we can't fix the imbalance. + * Just return. This is probably a logic error, but it's not fatal. + */ + else { + if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i))) + return error; + *stat = 1; + return 0; + } + /* + * We're now going to join "left" and "right" by moving all the stuff + * in "right" to "left" and deleting "right". + */ + if (level > 0) { + /* + * It's a non-leaf. Move keys and pointers. + */ + lkp = XFS_ALLOC_KEY_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur); + lpp = XFS_ALLOC_PTR_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur); + rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur); + rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur); +#ifdef DEBUG + for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) { + if ((error = xfs_btree_check_sptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level))) + return error; + } +#endif + memcpy(lkp, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lkp)); /* INT_: structure copy */ + memcpy(lpp, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lpp)); /* INT_: structure copy */ + xfs_alloc_log_keys(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, + INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT)); + xfs_alloc_log_ptrs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, + INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT)); + } else { + /* + * It's a leaf. Move records. + */ + lrp = XFS_ALLOC_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur); + rrp = XFS_ALLOC_REC_ADDR(right, 1, cur); + memcpy(lrp, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lrp)); + xfs_alloc_log_recs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, + INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT)); + } + /* + * If we joined with the left neighbor, set the buffer in the + * cursor to the left block, and fix up the index. + */ + if (bp != lbp) { + xfs_btree_setbuf(cur, level, lbp); + cur->bc_ptrs[level] += INT_GET(left->bb_numrecs, ARCH_CONVERT); + } + /* + * If we joined with the right neighbor and there's a level above + * us, increment the cursor at that level. + */ + else if (level + 1 < cur->bc_nlevels && + (error = xfs_alloc_increment(cur, level + 1, &i))) + return error; + /* + * Fix up the number of records in the surviving block. + */ + INT_MOD(left->bb_numrecs, ARCH_CONVERT, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + /* + * Fix up the right block pointer in the surviving block, and log it. + */ + left->bb_rightsib = right->bb_rightsib; /* INT_: direct copy */ + xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); + /* + * If there is a right sibling now, make it point to the + * remaining block. + */ + if (INT_GET(left->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) { + xfs_alloc_block_t *rrblock; + xfs_buf_t *rrbp; + + if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, + cur->bc_private.a.agno, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0, + &rrbp, XFS_ALLOC_BTREE_REF))) + return error; + rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp); + if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))) + return error; + INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, lbno); + xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB); + } + /* + * Free the deleting block by putting it on the freelist. + */ + if ((error = xfs_alloc_put_freelist(cur->bc_tp, cur->bc_private.a.agbp, + NULL, rbno))) + return error; + /* + * Since blocks move to the free list without the coordination + * used in xfs_bmap_finish, we can't allow block to be available + * for reallocation and non-transaction writing (user data) + * until we know that the transaction that moved it to the free + * list is permanently on disk. We track the blocks by declaring + * these blocks as "busy"; the busy list is maintained on a + * per-ag basis and each transaction records which entries + * should be removed when the iclog commits to disk. If a + * busy block is allocated, the iclog is pushed up to the + * LSN that freed the block. + */ + xfs_alloc_mark_busy(cur->bc_tp, + INT_GET(agf->agf_seqno, ARCH_CONVERT), bno, 1); + + xfs_trans_agbtree_delta(cur->bc_tp, -1); + /* + * Adjust the current level's cursor so that we're left referring + * to the right node, after we're done. + * If this leaves the ptr value 0 our caller will fix it up. + */ + if (level > 0) + cur->bc_ptrs[level]--; + /* + * Return value means the next level up has something to do. + */ + *stat = 2; + return 0; + +error0: + xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); + return error; +} + +/* + * Insert one record/level. Return information to the caller + * allowing the next level up to proceed if necessary. + */ +STATIC int /* error */ +xfs_alloc_insrec( + xfs_btree_cur_t *cur, /* btree cursor */ + int level, /* level to insert record at */ + xfs_agblock_t *bnop, /* i/o: block number inserted */ + xfs_alloc_rec_t *recp, /* i/o: record data inserted */ + xfs_btree_cur_t **curp, /* output: new cursor replacing cur */ + int *stat) /* output: success/failure */ +{ + xfs_agf_t *agf; /* allocation group freelist header */ + xfs_alloc_block_t *block; /* btree block record/key lives in */ + xfs_buf_t *bp; /* buffer for block */ + int error; /* error return value */ + int i; /* loop index */ + xfs_alloc_key_t key; /* key value being inserted */ + xfs_alloc_key_t *kp; /* pointer to btree keys */ + xfs_agblock_t nbno; /* block number of allocated block */ + xfs_btree_cur_t *ncur; /* new cursor to be used at next lvl */ + xfs_alloc_key_t nkey; /* new key value, from split */ + xfs_alloc_rec_t nrec; /* new record value, for caller */ + int optr; /* old ptr value */ + xfs_alloc_ptr_t *pp; /* pointer to btree addresses */ + int ptr; /* index in btree block for this rec */ + xfs_alloc_rec_t *rp; /* pointer to btree records */ + + ASSERT(INT_GET(recp->ar_blockcount, ARCH_CONVERT) > 0); + /* + * If we made it to the root level, allocate a new root block + * and we're done. + */ + if (level >= cur->bc_nlevels) { + XFS_STATS_INC(xs_abt_insrec); + if ((error = xfs_alloc_newroot(cur, &i))) + return error; + *bnop = NULLAGBLOCK; + *stat = i; + return 0; + } + /* + * Make a key out of the record data to be inserted, and save it. + */ + key.ar_startblock = recp->ar_startblock; /* INT_: direct copy */ + key.ar_blockcount = recp->ar_blockcount; /* INT_: direct copy */ + optr = ptr = cur->bc_ptrs[level]; + /* + * If we're off the left edge, return failure. + */ + if (ptr == 0) { + *stat = 0; + return 0; + } + XFS_STATS_INC(xs_abt_insrec); + /* + * Get pointers to the btree buffer and block. + */ + bp = cur->bc_bufs[level]; + block = XFS_BUF_TO_ALLOC_BLOCK(bp); +#ifdef DEBUG + if ((error = xfs_btree_check_sblock(cur, block, level, bp))) + return error; + /* + * Check that the new entry is being inserted in the right place. + */ + if (ptr <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + if (level == 0) { + rp = XFS_ALLOC_REC_ADDR(block, ptr, cur); + xfs_btree_check_rec(cur->bc_btnum, recp, rp); + } else { + kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur); + xfs_btree_check_key(cur->bc_btnum, &key, kp); + } + } +#endif + nbno = NULLAGBLOCK; + ncur = (xfs_btree_cur_t *)0; + /* + * If the block is full, we can't insert the new entry until we + * make the block un-full. + */ + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { + /* + * First, try shifting an entry to the right neighbor. + */ + if ((error = xfs_alloc_rshift(cur, level, &i))) + return error; + if (i) { + /* nothing */ + } + /* + * Next, try shifting an entry to the left neighbor. + */ + else { + if ((error = xfs_alloc_lshift(cur, level, &i))) + return error; + if (i) + optr = ptr = cur->bc_ptrs[level]; + else { + /* + * Next, try splitting the current block in + * half. If this works we have to re-set our + * variables because we could be in a + * different block now. + */ + if ((error = xfs_alloc_split(cur, level, &nbno, + &nkey, &ncur, &i))) + return error; + if (i) { + bp = cur->bc_bufs[level]; + block = XFS_BUF_TO_ALLOC_BLOCK(bp); +#ifdef DEBUG + if ((error = + xfs_btree_check_sblock(cur, + block, level, bp))) + return error; +#endif + ptr = cur->bc_ptrs[level]; + nrec.ar_startblock = nkey.ar_startblock; /* INT_: direct copy */ + nrec.ar_blockcount = nkey.ar_blockcount; /* INT_: direct copy */ + } + /* + * Otherwise the insert fails. + */ + else { + *stat = 0; + return 0; + } + } + } + } + /* + * At this point we know there's room for our new entry in the block + * we're pointing at. + */ + if (level > 0) { + /* + * It's a non-leaf entry. Make a hole for the new data + * in the key and ptr regions of the block. + */ + kp = XFS_ALLOC_KEY_ADDR(block, 1, cur); + pp = XFS_ALLOC_PTR_ADDR(block, 1, cur); +#ifdef DEBUG + for (i = INT_GET(block->bb_numrecs, ARCH_CONVERT); i >= ptr; i--) { + if ((error = xfs_btree_check_sptr(cur, INT_GET(pp[i - 1], ARCH_CONVERT), level))) + return error; + } +#endif + memmove(&kp[ptr], &kp[ptr - 1], + (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*kp)); /* INT_: copy */ + memmove(&pp[ptr], &pp[ptr - 1], + (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*pp)); /* INT_: copy */ +#ifdef DEBUG + if ((error = xfs_btree_check_sptr(cur, *bnop, level))) + return error; +#endif + /* + * Now stuff the new data in, bump numrecs and log the new data. + */ + kp[ptr - 1] = key; + INT_SET(pp[ptr - 1], ARCH_CONVERT, *bnop); + INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1); + xfs_alloc_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT)); + xfs_alloc_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT)); +#ifdef DEBUG + if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) + xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1, + kp + ptr); +#endif + } else { + /* + * It's a leaf entry. Make a hole for the new record. + */ + rp = XFS_ALLOC_REC_ADDR(block, 1, cur); + memmove(&rp[ptr], &rp[ptr - 1], + (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*rp)); + /* + * Now stuff the new record in, bump numrecs + * and log the new data. + */ + rp[ptr - 1] = *recp; /* INT_: struct copy */ + INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1); + xfs_alloc_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT)); +#ifdef DEBUG + if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) + xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1, + rp + ptr); +#endif + } + /* + * Log the new number of records in the btree header. + */ + xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS); + /* + * If we inserted at the start of a block, update the parents' keys. + */ + if (optr == 1 && (error = xfs_alloc_updkey(cur, &key, level + 1))) + return error; + /* + * Look to see if the longest extent in the allocation group + * needs to be updated. + */ + + agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + if (level == 0 && + cur->bc_btnum == XFS_BTNUM_CNT && + INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK && + INT_GET(recp->ar_blockcount, ARCH_CONVERT) > INT_GET(agf->agf_longest, ARCH_CONVERT)) { + /* + * If this is a leaf in the by-size btree and there + * is no right sibling block and this block is bigger + * than the previous longest block, update it. + */ + INT_COPY(agf->agf_longest, recp->ar_blockcount, ARCH_CONVERT); + cur->bc_mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)].pagf_longest + = INT_GET(recp->ar_blockcount, ARCH_CONVERT); + xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, + XFS_AGF_LONGEST); + } + /* + * Return the new block number, if any. + * If there is one, give back a record value and a cursor too. + */ + *bnop = nbno; + if (nbno != NULLAGBLOCK) { + *recp = nrec; /* INT_: struct copy */ + *curp = ncur; /* INT_: struct copy */ + } + *stat = 1; + return 0; +} + +/* + * Log header fields from a btree block. + */ +STATIC void +xfs_alloc_log_block( + xfs_trans_t *tp, /* transaction pointer */ + xfs_buf_t *bp, /* buffer containing btree block */ + int fields) /* mask of fields: XFS_BB_... */ +{ + int first; /* first byte offset logged */ + int last; /* last byte offset logged */ + static const short offsets[] = { /* table of offsets */ + offsetof(xfs_alloc_block_t, bb_magic), + offsetof(xfs_alloc_block_t, bb_level), + offsetof(xfs_alloc_block_t, bb_numrecs), + offsetof(xfs_alloc_block_t, bb_leftsib), + offsetof(xfs_alloc_block_t, bb_rightsib), + sizeof(xfs_alloc_block_t) + }; + + xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last); + xfs_trans_log_buf(tp, bp, first, last); +} + +/* + * Log keys from a btree block (nonleaf). + */ +STATIC void +xfs_alloc_log_keys( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_buf_t *bp, /* buffer containing btree block */ + int kfirst, /* index of first key to log */ + int klast) /* index of last key to log */ +{ + xfs_alloc_block_t *block; /* btree block to log from */ + int first; /* first byte offset logged */ + xfs_alloc_key_t *kp; /* key pointer in btree block */ + int last; /* last byte offset logged */ + + block = XFS_BUF_TO_ALLOC_BLOCK(bp); + kp = XFS_ALLOC_KEY_ADDR(block, 1, cur); + first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block); + last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block); + xfs_trans_log_buf(cur->bc_tp, bp, first, last); +} + +/* + * Log block pointer fields from a btree block (nonleaf). + */ +STATIC void +xfs_alloc_log_ptrs( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_buf_t *bp, /* buffer containing btree block */ + int pfirst, /* index of first pointer to log */ + int plast) /* index of last pointer to log */ +{ + xfs_alloc_block_t *block; /* btree block to log from */ + int first; /* first byte offset logged */ + int last; /* last byte offset logged */ + xfs_alloc_ptr_t *pp; /* block-pointer pointer in btree blk */ + + block = XFS_BUF_TO_ALLOC_BLOCK(bp); + pp = XFS_ALLOC_PTR_ADDR(block, 1, cur); + first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block); + last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block); + xfs_trans_log_buf(cur->bc_tp, bp, first, last); +} + +/* + * Log records from a btree block (leaf). + */ +STATIC void +xfs_alloc_log_recs( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_buf_t *bp, /* buffer containing btree block */ + int rfirst, /* index of first record to log */ + int rlast) /* index of last record to log */ +{ + xfs_alloc_block_t *block; /* btree block to log from */ + int first; /* first byte offset logged */ + int last; /* last byte offset logged */ + xfs_alloc_rec_t *rp; /* record pointer for btree block */ + + + block = XFS_BUF_TO_ALLOC_BLOCK(bp); + rp = XFS_ALLOC_REC_ADDR(block, 1, cur); +#ifdef DEBUG + { + xfs_agf_t *agf; + xfs_alloc_rec_t *p; + + agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + for (p = &rp[rfirst - 1]; p <= &rp[rlast - 1]; p++) + ASSERT(INT_GET(p->ar_startblock, ARCH_CONVERT) + INT_GET(p->ar_blockcount, ARCH_CONVERT) <= + INT_GET(agf->agf_length, ARCH_CONVERT)); + } +#endif + first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block); + last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block); + xfs_trans_log_buf(cur->bc_tp, bp, first, last); +} + +/* + * Lookup the record. The cursor is made to point to it, based on dir. + * Return 0 if can't find any such record, 1 for success. + */ +STATIC int /* error */ +xfs_alloc_lookup( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_lookup_t dir, /* <=, ==, or >= */ + int *stat) /* success/failure */ +{ + xfs_agblock_t agbno; /* a.g. relative btree block number */ + xfs_agnumber_t agno; /* allocation group number */ + xfs_alloc_block_t *block=NULL; /* current btree block */ + int diff; /* difference for the current key */ + int error; /* error return value */ + int keyno=0; /* current key number */ + int level; /* level in the btree */ + xfs_mount_t *mp; /* file system mount point */ + + XFS_STATS_INC(xs_abt_lookup); + /* + * Get the allocation group header, and the root block number. + */ + mp = cur->bc_mp; + + { + xfs_agf_t *agf; /* a.g. freespace header */ + + agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + agno = INT_GET(agf->agf_seqno, ARCH_CONVERT); + agbno = INT_GET(agf->agf_roots[cur->bc_btnum], ARCH_CONVERT); + } + /* + * Iterate over each level in the btree, starting at the root. + * For each level above the leaves, find the key we need, based + * on the lookup record, then follow the corresponding block + * pointer down to the next level. + */ + for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { + xfs_buf_t *bp; /* buffer pointer for btree block */ + xfs_daddr_t d; /* disk address of btree block */ + + /* + * Get the disk address we're looking for. + */ + d = XFS_AGB_TO_DADDR(mp, agno, agbno); + /* + * If the old buffer at this level is for a different block, + * throw it away, otherwise just use it. + */ + bp = cur->bc_bufs[level]; + if (bp && XFS_BUF_ADDR(bp) != d) + bp = (xfs_buf_t *)0; + if (!bp) { + /* + * Need to get a new buffer. Read it, then + * set it in the cursor, releasing the old one. + */ + if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, agno, + agbno, 0, &bp, XFS_ALLOC_BTREE_REF))) + return error; + xfs_btree_setbuf(cur, level, bp); + /* + * Point to the btree block, now that we have the buffer + */ + block = XFS_BUF_TO_ALLOC_BLOCK(bp); + if ((error = xfs_btree_check_sblock(cur, block, level, + bp))) + return error; + } else + block = XFS_BUF_TO_ALLOC_BLOCK(bp); + /* + * If we already had a key match at a higher level, we know + * we need to use the first entry in this block. + */ + if (diff == 0) + keyno = 1; + /* + * Otherwise we need to search this block. Do a binary search. + */ + else { + int high; /* high entry number */ + xfs_alloc_key_t *kkbase=NULL;/* base of keys in block */ + xfs_alloc_rec_t *krbase=NULL;/* base of records in block */ + int low; /* low entry number */ + + /* + * Get a pointer to keys or records. + */ + if (level > 0) + kkbase = XFS_ALLOC_KEY_ADDR(block, 1, cur); + else + krbase = XFS_ALLOC_REC_ADDR(block, 1, cur); + /* + * Set low and high entry numbers, 1-based. + */ + low = 1; + if (!(high = INT_GET(block->bb_numrecs, ARCH_CONVERT))) { + /* + * If the block is empty, the tree must + * be an empty leaf. + */ + ASSERT(level == 0 && cur->bc_nlevels == 1); + cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; + *stat = 0; + return 0; + } + /* + * Binary search the block. + */ + while (low <= high) { + xfs_extlen_t blockcount; /* key value */ + xfs_agblock_t startblock; /* key value */ + + XFS_STATS_INC(xs_abt_compare); + /* + * keyno is average of low and high. + */ + keyno = (low + high) >> 1; + /* + * Get startblock & blockcount. + */ + if (level > 0) { + xfs_alloc_key_t *kkp; + + kkp = kkbase + keyno - 1; + startblock = INT_GET(kkp->ar_startblock, ARCH_CONVERT); + blockcount = INT_GET(kkp->ar_blockcount, ARCH_CONVERT); + } else { + xfs_alloc_rec_t *krp; + + krp = krbase + keyno - 1; + startblock = INT_GET(krp->ar_startblock, ARCH_CONVERT); + blockcount = INT_GET(krp->ar_blockcount, ARCH_CONVERT); + } + /* + * Compute difference to get next direction. + */ + if (cur->bc_btnum == XFS_BTNUM_BNO) + diff = (int)startblock - + (int)cur->bc_rec.a.ar_startblock; + else if (!(diff = (int)blockcount - + (int)cur->bc_rec.a.ar_blockcount)) + diff = (int)startblock - + (int)cur->bc_rec.a.ar_startblock; + /* + * Less than, move right. + */ + if (diff < 0) + low = keyno + 1; + /* + * Greater than, move left. + */ + else if (diff > 0) + high = keyno - 1; + /* + * Equal, we're done. + */ + else + break; + } + } + /* + * If there are more levels, set up for the next level + * by getting the block number and filling in the cursor. + */ + if (level > 0) { + /* + * If we moved left, need the previous key number, + * unless there isn't one. + */ + if (diff > 0 && --keyno < 1) + keyno = 1; + agbno = INT_GET(*XFS_ALLOC_PTR_ADDR(block, keyno, cur), ARCH_CONVERT); +#ifdef DEBUG + if ((error = xfs_btree_check_sptr(cur, agbno, level))) + return error; +#endif + cur->bc_ptrs[level] = keyno; + } + } + /* + * Done with the search. + * See if we need to adjust the results. + */ + if (dir != XFS_LOOKUP_LE && diff < 0) { + keyno++; + /* + * If ge search and we went off the end of the block, but it's + * not the last block, we're in the wrong block. + */ + if (dir == XFS_LOOKUP_GE && + keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT) && + INT_GET(block->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) { + int i; + + cur->bc_ptrs[0] = keyno; + if ((error = xfs_alloc_increment(cur, 0, &i))) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + *stat = 1; + return 0; + } + } + else if (dir == XFS_LOOKUP_LE && diff > 0) + keyno--; + cur->bc_ptrs[0] = keyno; + /* + * Return if we succeeded or not. + */ + if (keyno == 0 || keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT)) + *stat = 0; + else + *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0)); + return 0; +} + +/* + * Move 1 record left from cur/level if possible. + * Update cur to reflect the new path. + */ +STATIC int /* error */ +xfs_alloc_lshift( + xfs_btree_cur_t *cur, /* btree cursor */ + int level, /* level to shift record on */ + int *stat) /* success/failure */ +{ + int error; /* error return value */ +#ifdef DEBUG + int i; /* loop index */ +#endif + xfs_alloc_key_t key; /* key value for leaf level upward */ + xfs_buf_t *lbp; /* buffer for left neighbor block */ + xfs_alloc_block_t *left; /* left neighbor btree block */ + int nrec; /* new number of left block entries */ + xfs_buf_t *rbp; /* buffer for right (current) block */ + xfs_alloc_block_t *right; /* right (current) btree block */ + xfs_alloc_key_t *rkp=NULL; /* key pointer for right block */ + xfs_alloc_ptr_t *rpp=NULL; /* address pointer for right block */ + xfs_alloc_rec_t *rrp=NULL; /* record pointer for right block */ + + /* + * Set up variables for this block as "right". + */ + rbp = cur->bc_bufs[level]; + right = XFS_BUF_TO_ALLOC_BLOCK(rbp); +#ifdef DEBUG + if ((error = xfs_btree_check_sblock(cur, right, level, rbp))) + return error; +#endif + /* + * If we've got no left sibling then we can't shift an entry left. + */ + if (INT_GET(right->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK) { + *stat = 0; + return 0; + } + /* + * If the cursor entry is the one that would be moved, don't + * do it... it's too complicated. + */ + if (cur->bc_ptrs[level] <= 1) { + *stat = 0; + return 0; + } + /* + * Set up the left neighbor as "left". + */ + if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, + cur->bc_private.a.agno, INT_GET(right->bb_leftsib, ARCH_CONVERT), 0, &lbp, + XFS_ALLOC_BTREE_REF))) + return error; + left = XFS_BUF_TO_ALLOC_BLOCK(lbp); + if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) + return error; + /* + * If it's full, it can't take another entry. + */ + if (INT_GET(left->bb_numrecs, ARCH_CONVERT) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { + *stat = 0; + return 0; + } + nrec = INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1; + /* + * If non-leaf, copy a key and a ptr to the left block. + */ + if (level > 0) { + xfs_alloc_key_t *lkp; /* key pointer for left block */ + xfs_alloc_ptr_t *lpp; /* address pointer for left block */ + + lkp = XFS_ALLOC_KEY_ADDR(left, nrec, cur); + rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur); + *lkp = *rkp; + xfs_alloc_log_keys(cur, lbp, nrec, nrec); + lpp = XFS_ALLOC_PTR_ADDR(left, nrec, cur); + rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur); +#ifdef DEBUG + if ((error = xfs_btree_check_sptr(cur, INT_GET(*rpp, ARCH_CONVERT), level))) + return error; +#endif + *lpp = *rpp; /* INT_: copy */ + xfs_alloc_log_ptrs(cur, lbp, nrec, nrec); + xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp); + } + /* + * If leaf, copy a record to the left block. + */ + else { + xfs_alloc_rec_t *lrp; /* record pointer for left block */ + + lrp = XFS_ALLOC_REC_ADDR(left, nrec, cur); + rrp = XFS_ALLOC_REC_ADDR(right, 1, cur); + *lrp = *rrp; + xfs_alloc_log_recs(cur, lbp, nrec, nrec); + xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp); + } + /* + * Bump and log left's numrecs, decrement and log right's numrecs. + */ + INT_MOD(left->bb_numrecs, ARCH_CONVERT, +1); + xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS); + INT_MOD(right->bb_numrecs, ARCH_CONVERT, -1); + xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS); + /* + * Slide the contents of right down one entry. + */ + if (level > 0) { +#ifdef DEBUG + for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) { + if ((error = xfs_btree_check_sptr(cur, INT_GET(rpp[i + 1], ARCH_CONVERT), + level))) + return error; + } +#endif + memmove(rkp, rkp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp)); + memmove(rpp, rpp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp)); + xfs_alloc_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + xfs_alloc_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + } else { + memmove(rrp, rrp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp)); + xfs_alloc_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + key.ar_startblock = rrp->ar_startblock; /* INT_: direct copy */ + key.ar_blockcount = rrp->ar_blockcount; /* INT_: direct copy */ + rkp = &key; + } + /* + * Update the parent key values of right. + */ + if ((error = xfs_alloc_updkey(cur, rkp, level + 1))) + return error; + /* + * Slide the cursor value left one. + */ + cur->bc_ptrs[level]--; + *stat = 1; + return 0; +} + +/* + * Allocate a new root block, fill it in. + */ +STATIC int /* error */ +xfs_alloc_newroot( + xfs_btree_cur_t *cur, /* btree cursor */ + int *stat) /* success/failure */ +{ + int error; /* error return value */ + xfs_agblock_t lbno; /* left block number */ + xfs_buf_t *lbp; /* left btree buffer */ + xfs_alloc_block_t *left; /* left btree block */ + xfs_mount_t *mp; /* mount structure */ + xfs_agblock_t nbno; /* new block number */ + xfs_buf_t *nbp; /* new (root) buffer */ + xfs_alloc_block_t *new; /* new (root) btree block */ + int nptr; /* new value for key index, 1 or 2 */ + xfs_agblock_t rbno; /* right block number */ + xfs_buf_t *rbp; /* right btree buffer */ + xfs_alloc_block_t *right; /* right btree block */ + + mp = cur->bc_mp; + + ASSERT(cur->bc_nlevels < XFS_AG_MAXLEVELS(mp)); + /* + * Get a buffer from the freelist blocks, for the new root. + */ + if ((error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, + &nbno))) + return error; + /* + * None available, we fail. + */ + if (nbno == NULLAGBLOCK) { + *stat = 0; + return 0; + } + xfs_trans_agbtree_delta(cur->bc_tp, 1); + nbp = xfs_btree_get_bufs(mp, cur->bc_tp, cur->bc_private.a.agno, nbno, + 0); + new = XFS_BUF_TO_ALLOC_BLOCK(nbp); + /* + * Set the root data in the a.g. freespace structure. + */ + { + xfs_agf_t *agf; /* a.g. freespace header */ + xfs_agnumber_t seqno; + + agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + INT_SET(agf->agf_roots[cur->bc_btnum], ARCH_CONVERT, nbno); + INT_MOD(agf->agf_levels[cur->bc_btnum], ARCH_CONVERT, 1); + seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT); + mp->m_perag[seqno].pagf_levels[cur->bc_btnum]++; + xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, + XFS_AGF_ROOTS | XFS_AGF_LEVELS); + } + /* + * At the previous root level there are now two blocks: the old + * root, and the new block generated when it was split. + * We don't know which one the cursor is pointing at, so we + * set up variables "left" and "right" for each case. + */ + lbp = cur->bc_bufs[cur->bc_nlevels - 1]; + left = XFS_BUF_TO_ALLOC_BLOCK(lbp); +#ifdef DEBUG + if ((error = xfs_btree_check_sblock(cur, left, cur->bc_nlevels - 1, lbp))) + return error; +#endif + if (INT_GET(left->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) { + /* + * Our block is left, pick up the right block. + */ + lbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(lbp)); + rbno = INT_GET(left->bb_rightsib, ARCH_CONVERT); + if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, + cur->bc_private.a.agno, rbno, 0, &rbp, + XFS_ALLOC_BTREE_REF))) + return error; + right = XFS_BUF_TO_ALLOC_BLOCK(rbp); + if ((error = xfs_btree_check_sblock(cur, right, + cur->bc_nlevels - 1, rbp))) + return error; + nptr = 1; + } else { + /* + * Our block is right, pick up the left block. + */ + rbp = lbp; + right = left; + rbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(rbp)); + lbno = INT_GET(right->bb_leftsib, ARCH_CONVERT); + if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, + cur->bc_private.a.agno, lbno, 0, &lbp, + XFS_ALLOC_BTREE_REF))) + return error; + left = XFS_BUF_TO_ALLOC_BLOCK(lbp); + if ((error = xfs_btree_check_sblock(cur, left, + cur->bc_nlevels - 1, lbp))) + return error; + nptr = 2; + } + /* + * Fill in the new block's btree header and log it. + */ + INT_SET(new->bb_magic, ARCH_CONVERT, xfs_magics[cur->bc_btnum]); + INT_SET(new->bb_level, ARCH_CONVERT, (__uint16_t)cur->bc_nlevels); + INT_SET(new->bb_numrecs, ARCH_CONVERT, 2); + INT_SET(new->bb_leftsib, ARCH_CONVERT, NULLAGBLOCK); + INT_SET(new->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK); + xfs_alloc_log_block(cur->bc_tp, nbp, XFS_BB_ALL_BITS); + ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK); + /* + * Fill in the key data in the new root. + */ + { + xfs_alloc_key_t *kp; /* btree key pointer */ + + kp = XFS_ALLOC_KEY_ADDR(new, 1, cur); + if (INT_GET(left->bb_level, ARCH_CONVERT) > 0) { + kp[0] = *XFS_ALLOC_KEY_ADDR(left, 1, cur); /* INT_: structure copy */ + kp[1] = *XFS_ALLOC_KEY_ADDR(right, 1, cur);/* INT_: structure copy */ + } else { + xfs_alloc_rec_t *rp; /* btree record pointer */ + + rp = XFS_ALLOC_REC_ADDR(left, 1, cur); + kp[0].ar_startblock = rp->ar_startblock; /* INT_: direct copy */ + kp[0].ar_blockcount = rp->ar_blockcount; /* INT_: direct copy */ + rp = XFS_ALLOC_REC_ADDR(right, 1, cur); + kp[1].ar_startblock = rp->ar_startblock; /* INT_: direct copy */ + kp[1].ar_blockcount = rp->ar_blockcount; /* INT_: direct copy */ + } + } + xfs_alloc_log_keys(cur, nbp, 1, 2); + /* + * Fill in the pointer data in the new root. + */ + { + xfs_alloc_ptr_t *pp; /* btree address pointer */ + + pp = XFS_ALLOC_PTR_ADDR(new, 1, cur); + INT_SET(pp[0], ARCH_CONVERT, lbno); + INT_SET(pp[1], ARCH_CONVERT, rbno); + } + xfs_alloc_log_ptrs(cur, nbp, 1, 2); + /* + * Fix up the cursor. + */ + xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); + cur->bc_ptrs[cur->bc_nlevels] = nptr; + cur->bc_nlevels++; + *stat = 1; + return 0; +} + +/* + * Move 1 record right from cur/level if possible. + * Update cur to reflect the new path. + */ +STATIC int /* error */ +xfs_alloc_rshift( + xfs_btree_cur_t *cur, /* btree cursor */ + int level, /* level to shift record on */ + int *stat) /* success/failure */ +{ + int error; /* error return value */ + int i; /* loop index */ + xfs_alloc_key_t key; /* key value for leaf level upward */ + xfs_buf_t *lbp; /* buffer for left (current) block */ + xfs_alloc_block_t *left; /* left (current) btree block */ + xfs_buf_t *rbp; /* buffer for right neighbor block */ + xfs_alloc_block_t *right; /* right neighbor btree block */ + xfs_alloc_key_t *rkp; /* key pointer for right block */ + xfs_btree_cur_t *tcur; /* temporary cursor */ + + /* + * Set up variables for this block as "left". + */ + lbp = cur->bc_bufs[level]; + left = XFS_BUF_TO_ALLOC_BLOCK(lbp); +#ifdef DEBUG + if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) + return error; +#endif + /* + * If we've got no right sibling then we can't shift an entry right. + */ + if (INT_GET(left->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK) { + *stat = 0; + return 0; + } + /* + * If the cursor entry is the one that would be moved, don't + * do it... it's too complicated. + */ + if (cur->bc_ptrs[level] >= INT_GET(left->bb_numrecs, ARCH_CONVERT)) { + *stat = 0; + return 0; + } + /* + * Set up the right neighbor as "right". + */ + if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, + cur->bc_private.a.agno, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0, &rbp, + XFS_ALLOC_BTREE_REF))) + return error; + right = XFS_BUF_TO_ALLOC_BLOCK(rbp); + if ((error = xfs_btree_check_sblock(cur, right, level, rbp))) + return error; + /* + * If it's full, it can't take another entry. + */ + if (INT_GET(right->bb_numrecs, ARCH_CONVERT) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { + *stat = 0; + return 0; + } + /* + * Make a hole at the start of the right neighbor block, then + * copy the last left block entry to the hole. + */ + if (level > 0) { + xfs_alloc_key_t *lkp; /* key pointer for left block */ + xfs_alloc_ptr_t *lpp; /* address pointer for left block */ + xfs_alloc_ptr_t *rpp; /* address pointer for right block */ + + lkp = XFS_ALLOC_KEY_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur); + lpp = XFS_ALLOC_PTR_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur); + rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur); + rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur); +#ifdef DEBUG + for (i = INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1; i >= 0; i--) { + if ((error = xfs_btree_check_sptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level))) + return error; + } +#endif + memmove(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp)); + memmove(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp)); +#ifdef DEBUG + if ((error = xfs_btree_check_sptr(cur, INT_GET(*lpp, ARCH_CONVERT), level))) + return error; +#endif + *rkp = *lkp; /* INT_: copy */ + *rpp = *lpp; /* INT_: copy */ + xfs_alloc_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1); + xfs_alloc_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1); + xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1); + } else { + xfs_alloc_rec_t *lrp; /* record pointer for left block */ + xfs_alloc_rec_t *rrp; /* record pointer for right block */ + + lrp = XFS_ALLOC_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur); + rrp = XFS_ALLOC_REC_ADDR(right, 1, cur); + memmove(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp)); + *rrp = *lrp; + xfs_alloc_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1); + key.ar_startblock = rrp->ar_startblock; /* INT_: direct copy */ + key.ar_blockcount = rrp->ar_blockcount; /* INT_: direct copy */ + rkp = &key; + xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1); + } + /* + * Decrement and log left's numrecs, bump and log right's numrecs. + */ + INT_MOD(left->bb_numrecs, ARCH_CONVERT, -1); + xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS); + INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1); + xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS); + /* + * Using a temporary cursor, update the parent key values of the + * block on the right. + */ + if ((error = xfs_btree_dup_cursor(cur, &tcur))) + return error; + i = xfs_btree_lastrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if ((error = xfs_alloc_increment(tcur, level, &i)) || + (error = xfs_alloc_updkey(tcur, rkp, level + 1))) + goto error0; + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + *stat = 1; + return 0; +error0: + xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); + return error; +} + +/* + * Split cur/level block in half. + * Return new block number and its first record (to be inserted into parent). + */ +STATIC int /* error */ +xfs_alloc_split( + xfs_btree_cur_t *cur, /* btree cursor */ + int level, /* level to split */ + xfs_agblock_t *bnop, /* output: block number allocated */ + xfs_alloc_key_t *keyp, /* output: first key of new block */ + xfs_btree_cur_t **curp, /* output: new cursor */ + int *stat) /* success/failure */ +{ + int error; /* error return value */ + int i; /* loop index/record number */ + xfs_agblock_t lbno; /* left (current) block number */ + xfs_buf_t *lbp; /* buffer for left block */ + xfs_alloc_block_t *left; /* left (current) btree block */ + xfs_agblock_t rbno; /* right (new) block number */ + xfs_buf_t *rbp; /* buffer for right block */ + xfs_alloc_block_t *right; /* right (new) btree block */ + + /* + * Allocate the new block from the freelist. + * If we can't do it, we're toast. Give up. + */ + if ((error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, + &rbno))) + return error; + if (rbno == NULLAGBLOCK) { + *stat = 0; + return 0; + } + xfs_trans_agbtree_delta(cur->bc_tp, 1); + rbp = xfs_btree_get_bufs(cur->bc_mp, cur->bc_tp, cur->bc_private.a.agno, + rbno, 0); + /* + * Set up the new block as "right". + */ + right = XFS_BUF_TO_ALLOC_BLOCK(rbp); + /* + * "Left" is the current (according to the cursor) block. + */ + lbp = cur->bc_bufs[level]; + left = XFS_BUF_TO_ALLOC_BLOCK(lbp); +#ifdef DEBUG + if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) + return error; +#endif + /* + * Fill in the btree header for the new block. + */ + INT_SET(right->bb_magic, ARCH_CONVERT, xfs_magics[cur->bc_btnum]); + right->bb_level = left->bb_level; /* INT_: direct copy */ + INT_SET(right->bb_numrecs, ARCH_CONVERT, (__uint16_t)(INT_GET(left->bb_numrecs, ARCH_CONVERT) / 2)); + /* + * Make sure that if there's an odd number of entries now, that + * each new block will have the same number of entries. + */ + if ((INT_GET(left->bb_numrecs, ARCH_CONVERT) & 1) && + cur->bc_ptrs[level] <= INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1) + INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1); + i = INT_GET(left->bb_numrecs, ARCH_CONVERT) - INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1; + /* + * For non-leaf blocks, copy keys and addresses over to the new block. + */ + if (level > 0) { + xfs_alloc_key_t *lkp; /* left btree key pointer */ + xfs_alloc_ptr_t *lpp; /* left btree address pointer */ + xfs_alloc_key_t *rkp; /* right btree key pointer */ + xfs_alloc_ptr_t *rpp; /* right btree address pointer */ + + lkp = XFS_ALLOC_KEY_ADDR(left, i, cur); + lpp = XFS_ALLOC_PTR_ADDR(left, i, cur); + rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur); + rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur); +#ifdef DEBUG + for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) { + if ((error = xfs_btree_check_sptr(cur, INT_GET(lpp[i], ARCH_CONVERT), level))) + return error; + } +#endif + memcpy(rkp, lkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp)); /* INT_: copy */ + memcpy(rpp, lpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp)); /* INT_: copy */ + xfs_alloc_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + xfs_alloc_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + *keyp = *rkp; + } + /* + * For leaf blocks, copy records over to the new block. + */ + else { + xfs_alloc_rec_t *lrp; /* left btree record pointer */ + xfs_alloc_rec_t *rrp; /* right btree record pointer */ + + lrp = XFS_ALLOC_REC_ADDR(left, i, cur); + rrp = XFS_ALLOC_REC_ADDR(right, 1, cur); + memcpy(rrp, lrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp)); + xfs_alloc_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + keyp->ar_startblock = rrp->ar_startblock; /* INT_: direct copy */ + keyp->ar_blockcount = rrp->ar_blockcount; /* INT_: direct copy */ + } + /* + * Find the left block number by looking in the buffer. + * Adjust numrecs, sibling pointers. + */ + lbno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(lbp)); + INT_MOD(left->bb_numrecs, ARCH_CONVERT, -(INT_GET(right->bb_numrecs, ARCH_CONVERT))); + right->bb_rightsib = left->bb_rightsib; /* INT_: direct copy */ + INT_SET(left->bb_rightsib, ARCH_CONVERT, rbno); + INT_SET(right->bb_leftsib, ARCH_CONVERT, lbno); + xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_ALL_BITS); + xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); + /* + * If there's a block to the new block's right, make that block + * point back to right instead of to left. + */ + if (INT_GET(right->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) { + xfs_alloc_block_t *rrblock; /* rr btree block */ + xfs_buf_t *rrbp; /* buffer for rrblock */ + + if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, + cur->bc_private.a.agno, INT_GET(right->bb_rightsib, ARCH_CONVERT), 0, + &rrbp, XFS_ALLOC_BTREE_REF))) + return error; + rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp); + if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))) + return error; + INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, rbno); + xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB); + } + /* + * If the cursor is really in the right block, move it there. + * If it's just pointing past the last entry in left, then we'll + * insert there, so don't change anything in that case. + */ + if (cur->bc_ptrs[level] > INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1) { + xfs_btree_setbuf(cur, level, rbp); + cur->bc_ptrs[level] -= INT_GET(left->bb_numrecs, ARCH_CONVERT); + } + /* + * If there are more levels, we'll need another cursor which refers to + * the right block, no matter where this cursor was. + */ + if (level + 1 < cur->bc_nlevels) { + if ((error = xfs_btree_dup_cursor(cur, curp))) + return error; + (*curp)->bc_ptrs[level + 1]++; + } + *bnop = rbno; + *stat = 1; + return 0; +} + +/* + * Update keys at all levels from here to the root along the cursor's path. + */ +STATIC int /* error */ +xfs_alloc_updkey( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_alloc_key_t *keyp, /* new key value to update to */ + int level) /* starting level for update */ +{ + int ptr; /* index of key in block */ + + /* + * Go up the tree from this level toward the root. + * At each level, update the key value to the value input. + * Stop when we reach a level where the cursor isn't pointing + * at the first entry in the block. + */ + for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { + xfs_alloc_block_t *block; /* btree block */ + xfs_buf_t *bp; /* buffer for block */ +#ifdef DEBUG + int error; /* error return value */ +#endif + xfs_alloc_key_t *kp; /* ptr to btree block keys */ + + bp = cur->bc_bufs[level]; + block = XFS_BUF_TO_ALLOC_BLOCK(bp); +#ifdef DEBUG + if ((error = xfs_btree_check_sblock(cur, block, level, bp))) + return error; +#endif + ptr = cur->bc_ptrs[level]; + kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur); + *kp = *keyp; + xfs_alloc_log_keys(cur, bp, ptr, ptr); + } + return 0; +} + +/* + * Externally visible routines. + */ + +/* + * Decrement cursor by one record at the level. + * For nonzero levels the leaf-ward information is untouched. + */ +int /* error */ +xfs_alloc_decrement( + xfs_btree_cur_t *cur, /* btree cursor */ + int level, /* level in btree, 0 is leaf */ + int *stat) /* success/failure */ +{ + xfs_alloc_block_t *block; /* btree block */ + int error; /* error return value */ + int lev; /* btree level */ + + ASSERT(level < cur->bc_nlevels); + /* + * Read-ahead to the left at this level. + */ + xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); + /* + * Decrement the ptr at this level. If we're still in the block + * then we're done. + */ + if (--cur->bc_ptrs[level] > 0) { + *stat = 1; + return 0; + } + /* + * Get a pointer to the btree block. + */ + block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[level]); +#ifdef DEBUG + if ((error = xfs_btree_check_sblock(cur, block, level, + cur->bc_bufs[level]))) + return error; +#endif + /* + * If we just went off the left edge of the tree, return failure. + */ + if (INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK) { + *stat = 0; + return 0; + } + /* + * March up the tree decrementing pointers. + * Stop when we don't go off the left edge of a block. + */ + for (lev = level + 1; lev < cur->bc_nlevels; lev++) { + if (--cur->bc_ptrs[lev] > 0) + break; + /* + * Read-ahead the left block, we're going to read it + * in the next loop. + */ + xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); + } + /* + * If we went off the root then we are seriously confused. + */ + ASSERT(lev < cur->bc_nlevels); + /* + * Now walk back down the tree, fixing up the cursor's buffer + * pointers and key numbers. + */ + for (block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]); lev > level; ) { + xfs_agblock_t agbno; /* block number of btree block */ + xfs_buf_t *bp; /* buffer pointer for block */ + + agbno = INT_GET(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT); + if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, + cur->bc_private.a.agno, agbno, 0, &bp, + XFS_ALLOC_BTREE_REF))) + return error; + lev--; + xfs_btree_setbuf(cur, lev, bp); + block = XFS_BUF_TO_ALLOC_BLOCK(bp); + if ((error = xfs_btree_check_sblock(cur, block, lev, bp))) + return error; + cur->bc_ptrs[lev] = INT_GET(block->bb_numrecs, ARCH_CONVERT); + } + *stat = 1; + return 0; +} + +/* + * Delete the record pointed to by cur. + * The cursor refers to the place where the record was (could be inserted) + * when the operation returns. + */ +int /* error */ +xfs_alloc_delete( + xfs_btree_cur_t *cur, /* btree cursor */ + int *stat) /* success/failure */ +{ + int error; /* error return value */ + int i; /* result code */ + int level; /* btree level */ + + /* + * Go up the tree, starting at leaf level. + * If 2 is returned then a join was done; go to the next level. + * Otherwise we are done. + */ + for (level = 0, i = 2; i == 2; level++) { + if ((error = xfs_alloc_delrec(cur, level, &i))) + return error; + } + if (i == 0) { + for (level = 1; level < cur->bc_nlevels; level++) { + if (cur->bc_ptrs[level] == 0) { + if ((error = xfs_alloc_decrement(cur, level, &i))) + return error; + break; + } + } + } + *stat = i; + return 0; +} + +/* + * Get the data from the pointed-to record. + */ +int /* error */ +xfs_alloc_get_rec( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_agblock_t *bno, /* output: starting block of extent */ + xfs_extlen_t *len, /* output: length of extent */ + int *stat) /* output: success/failure */ +{ + xfs_alloc_block_t *block; /* btree block */ +#ifdef DEBUG + int error; /* error return value */ +#endif + int ptr; /* record number */ + + ptr = cur->bc_ptrs[0]; + block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]); +#ifdef DEBUG + if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0]))) + return error; +#endif + /* + * Off the right end or left end, return failure. + */ + if (ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT) || ptr <= 0) { + *stat = 0; + return 0; + } + /* + * Point to the record and extract its data. + */ + { + xfs_alloc_rec_t *rec; /* record data */ + + rec = XFS_ALLOC_REC_ADDR(block, ptr, cur); + *bno = INT_GET(rec->ar_startblock, ARCH_CONVERT); + *len = INT_GET(rec->ar_blockcount, ARCH_CONVERT); + } + *stat = 1; + return 0; +} + +/* + * Increment cursor by one record at the level. + * For nonzero levels the leaf-ward information is untouched. + */ +int /* error */ +xfs_alloc_increment( + xfs_btree_cur_t *cur, /* btree cursor */ + int level, /* level in btree, 0 is leaf */ + int *stat) /* success/failure */ +{ + xfs_alloc_block_t *block; /* btree block */ + xfs_buf_t *bp; /* tree block buffer */ + int error; /* error return value */ + int lev; /* btree level */ + + ASSERT(level < cur->bc_nlevels); + /* + * Read-ahead to the right at this level. + */ + xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); + /* + * Get a pointer to the btree block. + */ + bp = cur->bc_bufs[level]; + block = XFS_BUF_TO_ALLOC_BLOCK(bp); +#ifdef DEBUG + if ((error = xfs_btree_check_sblock(cur, block, level, bp))) + return error; +#endif + /* + * Increment the ptr at this level. If we're still in the block + * then we're done. + */ + if (++cur->bc_ptrs[level] <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + *stat = 1; + return 0; + } + /* + * If we just went off the right edge of the tree, return failure. + */ + if (INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK) { + *stat = 0; + return 0; + } + /* + * March up the tree incrementing pointers. + * Stop when we don't go off the right edge of a block. + */ + for (lev = level + 1; lev < cur->bc_nlevels; lev++) { + bp = cur->bc_bufs[lev]; + block = XFS_BUF_TO_ALLOC_BLOCK(bp); +#ifdef DEBUG + if ((error = xfs_btree_check_sblock(cur, block, lev, bp))) + return error; +#endif + if (++cur->bc_ptrs[lev] <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) + break; + /* + * Read-ahead the right block, we're going to read it + * in the next loop. + */ + xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); + } + /* + * If we went off the root then we are seriously confused. + */ + ASSERT(lev < cur->bc_nlevels); + /* + * Now walk back down the tree, fixing up the cursor's buffer + * pointers and key numbers. + */ + for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_ALLOC_BLOCK(bp); + lev > level; ) { + xfs_agblock_t agbno; /* block number of btree block */ + + agbno = INT_GET(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT); + if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, + cur->bc_private.a.agno, agbno, 0, &bp, + XFS_ALLOC_BTREE_REF))) + return error; + lev--; + xfs_btree_setbuf(cur, lev, bp); + block = XFS_BUF_TO_ALLOC_BLOCK(bp); + if ((error = xfs_btree_check_sblock(cur, block, lev, bp))) + return error; + cur->bc_ptrs[lev] = 1; + } + *stat = 1; + return 0; +} + +/* + * Insert the current record at the point referenced by cur. + * The cursor may be inconsistent on return if splits have been done. + */ +int /* error */ +xfs_alloc_insert( + xfs_btree_cur_t *cur, /* btree cursor */ + int *stat) /* success/failure */ +{ + int error; /* error return value */ + int i; /* result value, 0 for failure */ + int level; /* current level number in btree */ + xfs_agblock_t nbno; /* new block number (split result) */ + xfs_btree_cur_t *ncur; /* new cursor (split result) */ + xfs_alloc_rec_t nrec; /* record being inserted this level */ + xfs_btree_cur_t *pcur; /* previous level's cursor */ + + level = 0; + nbno = NULLAGBLOCK; + INT_SET(nrec.ar_startblock, ARCH_CONVERT, cur->bc_rec.a.ar_startblock); + INT_SET(nrec.ar_blockcount, ARCH_CONVERT, cur->bc_rec.a.ar_blockcount); + ncur = (xfs_btree_cur_t *)0; + pcur = cur; + /* + * Loop going up the tree, starting at the leaf level. + * Stop when we don't get a split block, that must mean that + * the insert is finished with this level. + */ + do { + /* + * Insert nrec/nbno into this level of the tree. + * Note if we fail, nbno will be null. + */ + if ((error = xfs_alloc_insrec(pcur, level++, &nbno, &nrec, &ncur, + &i))) { + if (pcur != cur) + xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); + return error; + } + /* + * See if the cursor we just used is trash. + * Can't trash the caller's cursor, but otherwise we should + * if ncur is a new cursor or we're about to be done. + */ + if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) { + cur->bc_nlevels = pcur->bc_nlevels; + xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); + } + /* + * If we got a new cursor, switch to it. + */ + if (ncur) { + pcur = ncur; + ncur = (xfs_btree_cur_t *)0; + } + } while (nbno != NULLAGBLOCK); + *stat = i; + return 0; +} + +/* + * Lookup the record equal to [bno, len] in the btree given by cur. + */ +int /* error */ +xfs_alloc_lookup_eq( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat) /* success/failure */ +{ + cur->bc_rec.a.ar_startblock = bno; + cur->bc_rec.a.ar_blockcount = len; + return xfs_alloc_lookup(cur, XFS_LOOKUP_EQ, stat); +} + +/* + * Lookup the first record greater than or equal to [bno, len] + * in the btree given by cur. + */ +int /* error */ +xfs_alloc_lookup_ge( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat) /* success/failure */ +{ + cur->bc_rec.a.ar_startblock = bno; + cur->bc_rec.a.ar_blockcount = len; + return xfs_alloc_lookup(cur, XFS_LOOKUP_GE, stat); +} + +/* + * Lookup the first record less than or equal to [bno, len] + * in the btree given by cur. + */ +int /* error */ +xfs_alloc_lookup_le( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat) /* success/failure */ +{ + cur->bc_rec.a.ar_startblock = bno; + cur->bc_rec.a.ar_blockcount = len; + return xfs_alloc_lookup(cur, XFS_LOOKUP_LE, stat); +} + +/* + * Update the record referred to by cur, to the value given by [bno, len]. + * This either works (return 0) or gets an EFSCORRUPTED error. + */ +int /* error */ +xfs_alloc_update( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len) /* length of extent */ +{ + xfs_alloc_block_t *block; /* btree block to update */ + int error; /* error return value */ + int ptr; /* current record number (updating) */ + + ASSERT(len > 0); + /* + * Pick up the a.g. freelist struct and the current block. + */ + block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]); +#ifdef DEBUG + if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0]))) + return error; +#endif + /* + * Get the address of the rec to be updated. + */ + ptr = cur->bc_ptrs[0]; + { + xfs_alloc_rec_t *rp; /* pointer to updated record */ + + rp = XFS_ALLOC_REC_ADDR(block, ptr, cur); + /* + * Fill in the new contents and log them. + */ + INT_SET(rp->ar_startblock, ARCH_CONVERT, bno); + INT_SET(rp->ar_blockcount, ARCH_CONVERT, len); + xfs_alloc_log_recs(cur, cur->bc_bufs[0], ptr, ptr); + } + /* + * If it's the by-size btree and it's the last leaf block and + * it's the last record... then update the size of the longest + * extent in the a.g., which we cache in the a.g. freelist header. + */ + if (cur->bc_btnum == XFS_BTNUM_CNT && + INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK && + ptr == INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + xfs_agf_t *agf; /* a.g. freespace header */ + xfs_agnumber_t seqno; + + agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT); + cur->bc_mp->m_perag[seqno].pagf_longest = len; + INT_SET(agf->agf_longest, ARCH_CONVERT, len); + xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, + XFS_AGF_LONGEST); + } + /* + * Updating first record in leaf. Pass new key value up to our parent. + */ + if (ptr == 1) { + xfs_alloc_key_t key; /* key containing [bno, len] */ + + INT_SET(key.ar_startblock, ARCH_CONVERT, bno); + INT_SET(key.ar_blockcount, ARCH_CONVERT, len); + if ((error = xfs_alloc_updkey(cur, &key, 1))) + return error; + } + return 0; +} diff --git a/sys/gnu/fs/xfs/xfs_alloc_btree.h b/sys/gnu/fs/xfs/xfs_alloc_btree.h new file mode 100644 index 000000000000..ff2a216d39f2 --- /dev/null +++ b/sys/gnu/fs/xfs/xfs_alloc_btree.h @@ -0,0 +1,286 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_ALLOC_BTREE_H__ +#define __XFS_ALLOC_BTREE_H__ + +/* + * Freespace on-disk structures + */ + +struct xfs_buf; +struct xfs_btree_cur; +struct xfs_btree_sblock; +struct xfs_mount; + +/* + * There are two on-disk btrees, one sorted by blockno and one sorted + * by blockcount and blockno. All blocks look the same to make the code + * simpler; if we have time later, we'll make the optimizations. + */ +#define XFS_ABTB_MAGIC 0x41425442 /* 'ABTB' for bno tree */ +#define XFS_ABTC_MAGIC 0x41425443 /* 'ABTC' for cnt tree */ + +/* + * Data record/key structure + */ +typedef struct xfs_alloc_rec +{ + xfs_agblock_t ar_startblock; /* starting block number */ + xfs_extlen_t ar_blockcount; /* count of free blocks */ +} xfs_alloc_rec_t, xfs_alloc_key_t; + +typedef xfs_agblock_t xfs_alloc_ptr_t; /* btree pointer type */ + /* btree block header type */ +typedef struct xfs_btree_sblock xfs_alloc_block_t; + +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_ALLOC_BLOCK) +xfs_alloc_block_t *xfs_buf_to_alloc_block(struct xfs_buf *bp); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_ALLOC_BLOCK) +#define XFS_BUF_TO_ALLOC_BLOCK(bp) xfs_buf_to_alloc_block(bp) +#else +#define XFS_BUF_TO_ALLOC_BLOCK(bp) ((xfs_alloc_block_t *)(XFS_BUF_PTR(bp))) +#endif + +/* + * Real block structures have a size equal to the disk block size. + */ + +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_BLOCK_SIZE) +int xfs_alloc_block_size(int lev, struct xfs_btree_cur *cur); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_BLOCK_SIZE) +#define XFS_ALLOC_BLOCK_SIZE(lev,cur) xfs_alloc_block_size(lev,cur) +#else +#define XFS_ALLOC_BLOCK_SIZE(lev,cur) (1 << (cur)->bc_blocklog) +#endif + +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_BLOCK_MAXRECS) +int xfs_alloc_block_maxrecs(int lev, struct xfs_btree_cur *cur); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_BLOCK_MAXRECS) +#define XFS_ALLOC_BLOCK_MAXRECS(lev,cur) xfs_alloc_block_maxrecs(lev,cur) +#else +#define XFS_ALLOC_BLOCK_MAXRECS(lev,cur) \ + ((cur)->bc_mp->m_alloc_mxr[lev != 0]) +#endif + +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_BLOCK_MINRECS) +int xfs_alloc_block_minrecs(int lev, struct xfs_btree_cur *cur); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_BLOCK_MINRECS) +#define XFS_ALLOC_BLOCK_MINRECS(lev,cur) xfs_alloc_block_minrecs(lev,cur) +#else +#define XFS_ALLOC_BLOCK_MINRECS(lev,cur) \ + ((cur)->bc_mp->m_alloc_mnr[lev != 0]) +#endif + +/* + * Minimum and maximum blocksize and sectorsize. + * The blocksize upper limit is pretty much arbitrary. + * The sectorsize upper limit is due to sizeof(sb_sectsize). + */ +#define XFS_MIN_BLOCKSIZE_LOG 9 /* i.e. 512 bytes */ +#define XFS_MAX_BLOCKSIZE_LOG 16 /* i.e. 65536 bytes */ +#define XFS_MIN_BLOCKSIZE (1 << XFS_MIN_BLOCKSIZE_LOG) +#define XFS_MAX_BLOCKSIZE (1 << XFS_MAX_BLOCKSIZE_LOG) +#define XFS_MIN_SECTORSIZE_LOG 9 /* i.e. 512 bytes */ +#define XFS_MAX_SECTORSIZE_LOG 15 /* i.e. 32768 bytes */ +#define XFS_MIN_SECTORSIZE (1 << XFS_MIN_SECTORSIZE_LOG) +#define XFS_MAX_SECTORSIZE (1 << XFS_MAX_SECTORSIZE_LOG) + +/* + * Block numbers in the AG: + * SB is sector 0, AGF is sector 1, AGI is sector 2, AGFL is sector 3. + */ +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_BNO_BLOCK) +xfs_agblock_t xfs_bno_block(struct xfs_mount *mp); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BNO_BLOCK) +#define XFS_BNO_BLOCK(mp) xfs_bno_block(mp) +#else +#define XFS_BNO_BLOCK(mp) ((xfs_agblock_t)(XFS_AGFL_BLOCK(mp) + 1)) +#endif + +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_CNT_BLOCK) +xfs_agblock_t xfs_cnt_block(struct xfs_mount *mp); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CNT_BLOCK) +#define XFS_CNT_BLOCK(mp) xfs_cnt_block(mp) +#else +#define XFS_CNT_BLOCK(mp) ((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1)) +#endif + +/* + * Record, key, and pointer address macros for btree blocks. + */ +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_REC_ADDR) +xfs_alloc_rec_t *xfs_alloc_rec_addr(xfs_alloc_block_t *bb, int i, + struct xfs_btree_cur *cur); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_REC_ADDR) +#define XFS_ALLOC_REC_ADDR(bb,i,cur) xfs_alloc_rec_addr(bb,i,cur) +#else +#define XFS_ALLOC_REC_ADDR(bb,i,cur) \ + XFS_BTREE_REC_ADDR(XFS_ALLOC_BLOCK_SIZE(0,cur), xfs_alloc, bb, i, \ + XFS_ALLOC_BLOCK_MAXRECS(0, cur)) +#endif + +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_KEY_ADDR) +xfs_alloc_key_t *xfs_alloc_key_addr(xfs_alloc_block_t *bb, int i, + struct xfs_btree_cur *cur); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_KEY_ADDR) +#define XFS_ALLOC_KEY_ADDR(bb,i,cur) xfs_alloc_key_addr(bb,i,cur) +#else +#define XFS_ALLOC_KEY_ADDR(bb,i,cur) \ + XFS_BTREE_KEY_ADDR(XFS_ALLOC_BLOCK_SIZE(1,cur), xfs_alloc, bb, i, \ + XFS_ALLOC_BLOCK_MAXRECS(1, cur)) +#endif + +#if XFS_WANT_FUNCS || XFS_WANT_FUNCS_C || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_PTR_ADDR) +xfs_alloc_ptr_t *xfs_alloc_ptr_addr(xfs_alloc_block_t *bb, int i, + struct xfs_btree_cur *cur); +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_PTR_ADDR) +#define XFS_ALLOC_PTR_ADDR(bb,i,cur) xfs_alloc_ptr_addr(bb,i,cur) +#else +#define XFS_ALLOC_PTR_ADDR(bb,i,cur) \ + XFS_BTREE_PTR_ADDR(XFS_ALLOC_BLOCK_SIZE(1,cur), xfs_alloc, bb, i, \ + XFS_ALLOC_BLOCK_MAXRECS(1, cur)) +#endif + +/* + * Prototypes for externally visible routines. + */ + +/* + * Decrement cursor by one record at the level. + * For nonzero levels the leaf-ward information is untouched. + */ +int /* error */ +xfs_alloc_decrement( + struct xfs_btree_cur *cur, /* btree cursor */ + int level, /* level in btree, 0 is leaf */ + int *stat); /* success/failure */ + +/* + * Delete the record pointed to by cur. + * The cursor refers to the place where the record was (could be inserted) + * when the operation returns. + */ +int /* error */ +xfs_alloc_delete( + struct xfs_btree_cur *cur, /* btree cursor */ + int *stat); /* success/failure */ + +/* + * Get the data from the pointed-to record. + */ +int /* error */ +xfs_alloc_get_rec( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t *bno, /* output: starting block of extent */ + xfs_extlen_t *len, /* output: length of extent */ + int *stat); /* output: success/failure */ + +/* + * Increment cursor by one record at the level. + * For nonzero levels the leaf-ward information is untouched. + */ +int /* error */ +xfs_alloc_increment( + struct xfs_btree_cur *cur, /* btree cursor */ + int level, /* level in btree, 0 is leaf */ + int *stat); /* success/failure */ + +/* + * Insert the current record at the point referenced by cur. + * The cursor may be inconsistent on return if splits have been done. + */ +int /* error */ +xfs_alloc_insert( + struct xfs_btree_cur *cur, /* btree cursor */ + int *stat); /* success/failure */ + +/* + * Lookup the record equal to [bno, len] in the btree given by cur. + */ +int /* error */ +xfs_alloc_lookup_eq( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat); /* success/failure */ + +/* + * Lookup the first record greater than or equal to [bno, len] + * in the btree given by cur. + */ +int /* error */ +xfs_alloc_lookup_ge( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat); /* success/failure */ + +/* + * Lookup the first record less than or equal to [bno, len] + * in the btree given by cur. + */ +int /* error */ +xfs_alloc_lookup_le( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat); /* success/failure */ + +/* + * Update the record referred to by cur, to the value given by [bno, len]. + * This either works (return 0) or gets an EFSCORRUPTED error. + */ +int /* error */ +xfs_alloc_update( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len); /* length of extent */ + +#endif /* __XFS_ALLOC_BTREE_H__ */ diff --git a/sys/gnu/fs/xfs/xfs_arch.h b/sys/gnu/fs/xfs/xfs_arch.h new file mode 100644 index 000000000000..ea3c4f4a0a91 --- /dev/null +++ b/sys/gnu/fs/xfs/xfs_arch.h @@ -0,0 +1,322 @@ +/* + * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_ARCH_H__ +#define __XFS_ARCH_H__ + +#ifndef XFS_BIG_INUMS +# error XFS_BIG_INUMS must be defined true or false +#endif + +#ifdef __KERNEL__ + +#include <sys/endian.h> + +#define __LITTLE_ENDIAN _LITTLE_ENDIAN +#define __BIG_ENDIAN _BIG_ENDIAN +#define __BYTE_ORDER _BYTE_ORDER + +/* Compatibiliy defines */ +#define __swab16 __bswap16 +#define __swab32 __bswap32 +#define __swab64 __bswap64 +#endif /* __KERNEL__ */ + +/* do we need conversion? */ + +#define ARCH_NOCONVERT 1 +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define ARCH_CONVERT 0 +#else +# define ARCH_CONVERT ARCH_NOCONVERT +#endif + +/* generic swapping macros */ + +#define INT_SWAP16(type,var) ((typeof(type))(__swab16((__u16)(var)))) +#define INT_SWAP32(type,var) ((typeof(type))(__swab32((__u32)(var)))) +#define INT_SWAP64(type,var) ((typeof(type))(__swab64((__u64)(var)))) + +#define INT_SWAP(type, var) \ + ((sizeof(type) == 8) ? INT_SWAP64(type,var) : \ + ((sizeof(type) == 4) ? INT_SWAP32(type,var) : \ + ((sizeof(type) == 2) ? INT_SWAP16(type,var) : \ + (var)))) + +#define INT_SWAP_UNALIGNED_32(from,to) \ + { \ + ((__u8*)(to))[0] = ((__u8*)(from))[3]; \ + ((__u8*)(to))[1] = ((__u8*)(from))[2]; \ + ((__u8*)(to))[2] = ((__u8*)(from))[1]; \ + ((__u8*)(to))[3] = ((__u8*)(from))[0]; \ + } + +#define INT_SWAP_UNALIGNED_64(from,to) \ + { \ + INT_SWAP_UNALIGNED_32( ((__u8*)(from)) + 4, ((__u8*)(to))); \ + INT_SWAP_UNALIGNED_32( ((__u8*)(from)), ((__u8*)(to)) + 4); \ + } + +/* + * get and set integers from potentially unaligned locations + */ + +#define INT_GET_UNALIGNED_16_LE(pointer) \ + ((__u16)((((__u8*)(pointer))[0] ) | (((__u8*)(pointer))[1] << 8 ))) +#define INT_GET_UNALIGNED_16_BE(pointer) \ + ((__u16)((((__u8*)(pointer))[0] << 8) | (((__u8*)(pointer))[1]))) +#define INT_SET_UNALIGNED_16_LE(pointer,value) \ + { \ + ((__u8*)(pointer))[0] = (((value) ) & 0xff); \ + ((__u8*)(pointer))[1] = (((value) >> 8) & 0xff); \ + } +#define INT_SET_UNALIGNED_16_BE(pointer,value) \ + { \ + ((__u8*)(pointer))[0] = (((value) >> 8) & 0xff); \ + ((__u8*)(pointer))[1] = (((value) ) & 0xff); \ + } + +#define INT_GET_UNALIGNED_32_LE(pointer) \ + ((__u32)((((__u8*)(pointer))[0] ) | (((__u8*)(pointer))[1] << 8 ) \ + |(((__u8*)(pointer))[2] << 16) | (((__u8*)(pointer))[3] << 24))) +#define INT_GET_UNALIGNED_32_BE(pointer) \ + ((__u32)((((__u8*)(pointer))[0] << 24) | (((__u8*)(pointer))[1] << 16) \ + |(((__u8*)(pointer))[2] << 8) | (((__u8*)(pointer))[3] ))) +#define INT_SET_UNALIGNED_32_LE(pointer, value) \ + { \ + INT_SET_UNALIGNED_16_LE(pointer, \ + ((value) & 0xffff)); \ + INT_SET_UNALIGNED_16_LE(((__u8*)(pointer))+2, \ + (((value) >> 16) & 0xffff) ); \ + } +#define INT_SET_UNALIGNED_32_BE(pointer, value) \ + { \ + INT_SET_UNALIGNED_16_BE(pointer, \ + (((value) >> 16) & 0xffff) ); \ + INT_SET_UNALIGNED_16_BE(((__u8*)(pointer))+2, \ + ((value) & 0xffff) ); \ + } + +#define INT_GET_UNALIGNED_64_LE(pointer) \ + (((__u64)(INT_GET_UNALIGNED_32_LE(((__u8*)(pointer))+4)) << 32 ) \ + |((__u64)(INT_GET_UNALIGNED_32_LE(((__u8*)(pointer)) )) )) +#define INT_GET_UNALIGNED_64_BE(pointer) \ + (((__u64)(INT_GET_UNALIGNED_32_BE(((__u8*)(pointer)) )) << 32 ) \ + |((__u64)(INT_GET_UNALIGNED_32_BE(((__u8*)(pointer))+4)) )) +#define INT_SET_UNALIGNED_64_LE(pointer, value) \ + { \ + INT_SET_UNALIGNED_32_LE(pointer, \ + ((value) & 0xffffffff)); \ + INT_SET_UNALIGNED_32_LE(((__u8*)(pointer))+4, \ + (((value) >> 32) & 0xffffffff) ); \ + } +#define INT_SET_UNALIGNED_64_BE(pointer, value) \ + { \ + INT_SET_UNALIGNED_32_BE(pointer, \ + (((value) >> 16) & 0xffff) ); \ + INT_SET_UNALIGNED_32_BE(((__u8*)(pointer))+4, \ + ((value) & 0xffff) ); \ + } + +/* + * now pick the right ones for our MACHINE ARCHITECTURE + */ + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define INT_GET_UNALIGNED_16(pointer) INT_GET_UNALIGNED_16_LE(pointer) +#define INT_SET_UNALIGNED_16(pointer,value) INT_SET_UNALIGNED_16_LE(pointer,value) +#define INT_GET_UNALIGNED_32(pointer) INT_GET_UNALIGNED_32_LE(pointer) +#define INT_SET_UNALIGNED_32(pointer,value) INT_SET_UNALIGNED_32_LE(pointer,value) +#define INT_GET_UNALIGNED_64(pointer) INT_GET_UNALIGNED_64_LE(pointer) +#define INT_SET_UNALIGNED_64(pointer,value) INT_SET_UNALIGNED_64_LE(pointer,value) +#else +#define INT_GET_UNALIGNED_16(pointer) INT_GET_UNALIGNED_16_BE(pointer) +#define INT_SET_UNALIGNED_16(pointer,value) INT_SET_UNALIGNED_16_BE(pointer,value) +#define INT_GET_UNALIGNED_32(pointer) INT_GET_UNALIGNED_32_BE(pointer) +#define INT_SET_UNALIGNED_32(pointer,value) INT_SET_UNALIGNED_32_BE(pointer,value) +#define INT_GET_UNALIGNED_64(pointer) INT_GET_UNALIGNED_64_BE(pointer) +#define INT_SET_UNALIGNED_64(pointer,value) INT_SET_UNALIGNED_64_BE(pointer,value) +#endif + +/* define generic INT_ macros */ + +#define INT_GET(reference,arch) \ + (((arch) == ARCH_NOCONVERT) \ + ? \ + (reference) \ + : \ + INT_SWAP((reference),(reference)) \ + ) + +/* does not return a value */ +#define INT_SET(reference,arch,valueref) \ + (__builtin_constant_p(valueref) ? \ + (void)( (reference) = ( ((arch) != ARCH_NOCONVERT) ? (INT_SWAP((reference),(valueref))) : (valueref)) ) : \ + (void)( \ + ((reference) = (valueref)), \ + ( ((arch) != ARCH_NOCONVERT) ? (reference) = INT_SWAP((reference),(reference)) : 0 ) \ + ) \ + ) + +/* does not return a value */ +#define INT_MOD_EXPR(reference,arch,code) \ + (void)(((arch) == ARCH_NOCONVERT) \ + ? \ + ((reference) code) \ + : \ + ( \ + (reference) = INT_GET((reference),arch) , \ + ((reference) code), \ + INT_SET(reference, arch, reference) \ + ) \ + ) + +/* does not return a value */ +#define INT_MOD(reference,arch,delta) \ + (void)( \ + INT_MOD_EXPR(reference,arch,+=(delta)) \ + ) + +/* + * INT_COPY - copy a value between two locations with the + * _same architecture_ but _potentially different sizes_ + * + * if the types of the two parameters are equal or they are + * in native architecture, a simple copy is done + * + * otherwise, architecture conversions are done + * + */ + +/* does not return a value */ +#define INT_COPY(dst,src,arch) \ + (void)( \ + ((sizeof(dst) == sizeof(src)) || ((arch) == ARCH_NOCONVERT)) \ + ? \ + ((dst) = (src)) \ + : \ + INT_SET(dst, arch, INT_GET(src, arch)) \ + ) + +/* + * INT_XLATE - copy a value in either direction between two locations + * with different architectures + * + * dir < 0 - copy from memory to buffer (native to arch) + * dir > 0 - copy from buffer to memory (arch to native) + */ + +/* does not return a value */ +#define INT_XLATE(buf,mem,dir,arch) {\ + ASSERT(dir); \ + if (dir>0) { \ + (mem)=INT_GET(buf, arch); \ + } else { \ + INT_SET(buf, arch, mem); \ + } \ +} + +#define INT_ISZERO(reference,arch) \ + ((reference) == 0) + +#define INT_ZERO(reference,arch) \ + ((reference) = 0) + +#define INT_GET_UNALIGNED_16_ARCH(pointer,arch) \ + ( ((arch) == ARCH_NOCONVERT) \ + ? \ + (INT_GET_UNALIGNED_16(pointer)) \ + : \ + (INT_GET_UNALIGNED_16_BE(pointer)) \ + ) +#define INT_SET_UNALIGNED_16_ARCH(pointer,value,arch) \ + if ((arch) == ARCH_NOCONVERT) { \ + INT_SET_UNALIGNED_16(pointer,value); \ + } else { \ + INT_SET_UNALIGNED_16_BE(pointer,value); \ + } + +#define INT_GET_UNALIGNED_64_ARCH(pointer,arch) \ + ( ((arch) == ARCH_NOCONVERT) \ + ? \ + (INT_GET_UNALIGNED_64(pointer)) \ + : \ + (INT_GET_UNALIGNED_64_BE(pointer)) \ + ) +#define INT_SET_UNALIGNED_64_ARCH(pointer,value,arch) \ + if ((arch) == ARCH_NOCONVERT) { \ + INT_SET_UNALIGNED_64(pointer,value); \ + } else { \ + INT_SET_UNALIGNED_64_BE(pointer,value); \ + } + +#define DIRINO4_GET_ARCH(pointer,arch) \ + ( ((arch) == ARCH_NOCONVERT) \ + ? \ + (INT_GET_UNALIGNED_32(pointer)) \ + : \ + (INT_GET_UNALIGNED_32_BE(pointer)) \ + ) + +#if XFS_BIG_INUMS +#define DIRINO_GET_ARCH(pointer,arch) \ + ( ((arch) == ARCH_NOCONVERT) \ + ? \ + (INT_GET_UNALIGNED_64(pointer)) \ + : \ + (INT_GET_UNALIGNED_64_BE(pointer)) \ + ) +#else +/* MACHINE ARCHITECTURE dependent */ +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define DIRINO_GET_ARCH(pointer,arch) \ + DIRINO4_GET_ARCH((((__u8*)pointer)+4),arch) +#else +#define DIRINO_GET_ARCH(pointer,arch) \ + DIRINO4_GET_ARCH(pointer,arch) +#endif +#endif + +#define DIRINO_COPY_ARCH(from,to,arch) \ + if ((arch) == ARCH_NOCONVERT) { \ + memcpy(to,from,sizeof(xfs_ino_t)); \ + } else { \ + INT_SWAP_UNALIGNED_64(from,to); \ + } +#define DIRINO4_COPY_ARCH(from,to,arch) \ + if ((arch) == ARCH_NOCONVERT) { \ + memcpy(to,(((__u8*)from+4)),sizeof(xfs_dir2_ino4_t)); \ + } else { \ + INT_SWAP_UNALIGNED_32(from,to); \ + } + +#endif /* __XFS_ARCH_H__ */ + diff --git a/sys/gnu/fs/xfs/xfs_attr.c b/sys/gnu/fs/xfs/xfs_attr.c new file mode 100644 index 000000000000..0399c456165f --- /dev/null +++ b/sys/gnu/fs/xfs/xfs_attr.c @@ -0,0 +1,2687 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" + +#include "xfs_macros.h" +#include "xfs_types.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir.h" +#include "xfs_dir2.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_alloc_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_alloc.h" +#include "xfs_btree.h" +#include "xfs_attr_sf.h" +#include "xfs_dir_sf.h" +#include "xfs_dir2_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode_item.h" +#include "xfs_inode.h" +#include "xfs_bmap.h" +#include "xfs_da_btree.h" +#include "xfs_attr.h" +#include "xfs_attr_leaf.h" +#include "xfs_error.h" +#include "xfs_bit.h" +#include "xfs_quota.h" +#include "xfs_rw.h" +#include "xfs_trans_space.h" +#include "xfs_acl.h" + +/* + * xfs_attr.c + * + * Provide the external interfaces to manage attribute lists. + */ + +/*======================================================================== + * Function prototypes for the kernel. + *========================================================================*/ + +/* + * Internal routines when attribute list fits inside the inode. + */ +STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args); + +/* + * Internal routines when attribute list is one block. + */ +STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args); +STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args); +STATIC int xfs_attr_leaf_list(xfs_attr_list_context_t *context); + +/* + * Internal routines when attribute list is more than one block. + */ +STATIC int xfs_attr_node_addname(xfs_da_args_t *args); +STATIC int xfs_attr_node_removename(xfs_da_args_t *args); +STATIC int xfs_attr_node_list(xfs_attr_list_context_t *context); +STATIC int xfs_attr_fillstate(xfs_da_state_t *state); +STATIC int xfs_attr_refillstate(xfs_da_state_t *state); + +/* + * Routines to manipulate out-of-line attribute values. + */ +STATIC int xfs_attr_rmtval_get(xfs_da_args_t *args); +STATIC int xfs_attr_rmtval_set(xfs_da_args_t *args); +STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args); + +#define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ +#define ATTR_RMTVALUE_TRANSBLKS 8 /* max # of blks in a transaction */ + +#if defined(XFS_ATTR_TRACE) +ktrace_t *xfs_attr_trace_buf; +#endif + + +/*======================================================================== + * Overall external interface routines. + *========================================================================*/ + +/*ARGSUSED*/ +STATIC int +xfs_attr_get_int(xfs_inode_t *ip, const char *name, char *value, int *valuelenp, + int flags, int lock, struct cred *cred) +{ + xfs_da_args_t args; + int error; + int namelen; + + ASSERT(MAXNAMELEN-1 <= 0xff); /* length is stored in uint8 */ + namelen = strlen(name); + if (namelen >= MAXNAMELEN) + return(EFAULT); /* match IRIX behaviour */ + XFS_STATS_INC(xs_attr_get); + + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return(EIO); + + if ((XFS_IFORK_Q(ip) == 0) || + (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && + ip->i_d.di_anextents == 0)) + return(ENOATTR); + + if (lock) { + xfs_ilock(ip, XFS_ILOCK_SHARED); + /* + * Do we answer them, or ignore them? + */ + if ((error = xfs_iaccess(ip, S_IRUSR, cred))) { + xfs_iunlock(ip, XFS_ILOCK_SHARED); |